1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (c) 2001-2004, International Business Machines Corporation
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   07/23/01    aliu        Creation.
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "strmatch.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_data.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar EMPTY[] = { 0 }; // empty string: ""
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher)
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringMatcher::StringMatcher(const UnicodeString& theString,
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             int32_t start,
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             int32_t limit,
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             int32_t segmentNum,
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             const TransliterationRuleData& theData) :
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    data(&theData),
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    segmentNumber(segmentNum),
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matchStart(-1),
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matchLimit(-1)
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    theString.extractBetween(start, limit, pattern);
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringMatcher::StringMatcher(const StringMatcher& o) :
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeFunctor(o),
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeMatcher(o),
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeReplacer(o),
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pattern(o.pattern),
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    data(o.data),
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    segmentNumber(o.segmentNumber),
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matchStart(o.matchStart),
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matchLimit(o.matchLimit)
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringMatcher::~StringMatcher() {
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFunctor* StringMatcher::clone() const {
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new StringMatcher(*this);
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer.
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher* StringMatcher::toMatcher() const {
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (UnicodeMatcher*) this;
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API.  Cast 'this' to a UnicodeReplacer* pointer
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer.
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeReplacer* StringMatcher::toReplacer() const {
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (UnicodeReplacer*) this;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUMatchDegree StringMatcher::matches(const Replaceable& text,
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    int32_t& offset,
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    int32_t limit,
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    UBool incremental) {
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i;
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t cursor = offset;
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (limit < cursor) {
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Match in the reverse direction
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (i=pattern.length()-1; i>=0; --i) {
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UChar keyChar = pattern.charAt(i);
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeMatcher* subm = data->lookupMatcher(keyChar);
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (subm == 0) {
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (cursor > limit &&
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    keyChar == text.charAt(cursor)) {
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    --cursor;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return U_MISMATCH;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UMatchDegree m =
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    subm->matches(text, cursor, limit, incremental);
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (m != U_MATCH) {
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return m;
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Record the match position, but adjust for a normal
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // forward start, limit, and only if a prior match does not
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // exist -- we want the rightmost match.
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (matchStart < 0) {
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchStart = cursor+1;
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchLimit = offset+1;
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (i=0; i<pattern.length(); ++i) {
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (incremental && cursor == limit) {
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // We've reached the context limit without a mismatch and
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // without completing our match.
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return U_PARTIAL_MATCH;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UChar keyChar = pattern.charAt(i);
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeMatcher* subm = data->lookupMatcher(keyChar);
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (subm == 0) {
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // Don't need the cursor < limit check if
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // incremental is TRUE (because it's done above); do need
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // it otherwise.
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (cursor < limit &&
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    keyChar == text.charAt(cursor)) {
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ++cursor;
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return U_MISMATCH;
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UMatchDegree m =
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    subm->matches(text, cursor, limit, incremental);
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (m != U_MATCH) {
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return m;
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Record the match position
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        matchStart = offset;
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        matchLimit = cursor;
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offset = cursor;
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return U_MATCH;
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& StringMatcher::toPattern(UnicodeString& result,
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        UBool escapeUnprintable) const
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    result.truncate(0);
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString str, quoteBuf;
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (segmentNumber > 0) {
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result.append((UChar)40); /*(*/
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (int32_t i=0; i<pattern.length(); ++i) {
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar keyChar = pattern.charAt(i);
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const UnicodeMatcher* m = data->lookupMatcher(keyChar);
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (m == 0) {
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintable),
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         TRUE, escapeUnprintable, quoteBuf);
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (segmentNumber > 0) {
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result.append((UChar)41); /*)*/
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Flush quoteBuf out to result
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ICU_Utility::appendToRule(result, -1,
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                              TRUE, escapeUnprintable, quoteBuf);
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool StringMatcher::matchesIndexValue(uint8_t v) const {
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (pattern.length() == 0) {
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c = pattern.char32At(0);
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UnicodeMatcher *m = data->lookupMatcher(c);
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 ch;
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = pattern.char32At(i);
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const UnicodeMatcher* matcher = data->lookupMatcher(ch);
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (matcher == NULL) {
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            toUnionTo.add(ch);
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matcher->addMatchSetTo(toUnionTo);
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t StringMatcher::replace(Replaceable& text,
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               int32_t start,
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               int32_t limit,
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               int32_t& /*cursor*/) {
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t outLen = 0;
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Copy segment with out-of-band data
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t dest = limit;
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // If there was no match, that means that a quantifier
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // matched zero-length.  E.g., x (a)* y matched "xy".
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (matchStart >= 0) {
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (matchStart != matchLimit) {
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            text.copy(matchStart, matchLimit, dest);
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            outLen = matchLimit - matchStart;
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    text.handleReplaceBetween(start, limit, EMPTY); // delete original text
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return outLen;
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                UBool /*escapeUnprintable*/) const {
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // assert(segmentNumber > 0);
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    rule.truncate(0);
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    rule.append((UChar)0x0024 /*$*/);
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ICU_Utility::appendNumber(rule, segmentNumber, 10, 1);
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return rule;
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove any match info.  This must be called before performing a
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * set of matches with this segment.
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void StringMatcher::resetMatch() {
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matchStart = matchLimit = -1;
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Union the set of all characters that may output by this object
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into the given set.
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param toUnionTo the set into which to union the output characters
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringMatcher::addReplacementSetTo(UnicodeSet& /*toUnionTo*/) const {
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // The output of this replacer varies; it is the source text between
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // matchStart and matchLimit.  Since this varies depending on the
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // input text, we can't compute it here.  We can either do nothing
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // or we can add ALL characters to the set.  It's probably more useful
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // to do nothing.
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringMatcher::setData(const TransliterationRuleData* d) {
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    data = d;
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i = 0;
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (i<pattern.length()) {
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar32 c = pattern.char32At(i);
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UnicodeFunctor* f = data->lookup(c);
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (f != NULL) {
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            f->setData(data);
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i += UTF_CHAR_LENGTH(c);
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof
290