1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Copyright (C) 2001-2011, International Business Machines Corporation
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and others. All Rights Reserved.
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **********************************************************************
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Date        Name        Description
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   07/23/01    aliu        Creation.
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **********************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef STRMATCH_H
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define STRMATCH_H
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unifunct.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unimatch.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unirepl.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleData;
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * An object that matches a fixed input string, implementing the
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeMatcher API.  This object also implements the
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API, allowing it to emit the matched text as
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * output.  Since the match text may contain flexible match elements,
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * such as UnicodeSets, the emitted text is not the match pattern, but
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * instead a substring of the actual matched text.  Following
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * convention, the output text is the leftmost match seen up to this
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * point.
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A StringMatcher may represent a segment, in which case it has a
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * positive segment number.  This affects how the matcher converts
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * itself to a pattern but does not otherwise affect its function.
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A StringMatcher that is not a segment should not be used as a
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer.
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public UnicodeReplacer {
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru public:
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Construct a matcher that matches the given pattern string.
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param string the pattern to be matched, possibly containing
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * stand-ins that represent nested UnicodeMatcher objects.
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param start inclusive start index of text to be replaced
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param limit exclusive end index of text to be replaced;
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * must be greater than or equal to start
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param segmentNum the segment number from 1..n, or 0 if this is
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * not a segment.
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param data context object mapping stand-ins to
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * UnicodeMatcher objects.
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    StringMatcher(const UnicodeString& string,
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  int32_t start,
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  int32_t limit,
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  int32_t segmentNum,
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  const TransliterationRuleData& data);
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Copy constructor
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param o  the object to be copied.
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    StringMatcher(const StringMatcher& o);
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Destructor
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual ~StringMatcher();
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Implement UnicodeFunctor
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return a copy of the object.
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UnicodeFunctor* clone() const;
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * and return the pointer.
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return the UnicodeMatcher point.
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UnicodeMatcher* toMatcher() const;
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * UnicodeFunctor API.  Cast 'this' to a UnicodeReplacer* pointer
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * and return the pointer.
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return the UnicodeReplacer pointer.
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UnicodeReplacer* toReplacer() const;
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Implement UnicodeMatcher
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param text the text to be matched
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param offset on input, the index into text at which to begin
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * matching.  On output, the limit of the matched text.  The
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * number of matched characters is the output value of offset
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * minus the input value.  Offset should always point to the
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * HIGH SURROGATE (leading code unit) of a pair of surrogates,
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * both on entry and upon return.
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param limit the limit index of text to be matched.  Greater
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * than offset for a forward direction match, less than offset for
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * a backward direction match.  The last character to be
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * considered for matching will be text.charAt(limit-1) in the
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * forward direction or text.charAt(limit+1) in the backward
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * direction.
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param incremental  if TRUE, then assume further characters may
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * be inserted at limit and check for partial matching.  Otherwise
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * assume the text as given is complete.
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return a match degree value indicating a full match, a partial
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * match, or a mismatch.  If incremental is FALSE then
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * U_PARTIAL_MATCH should never be returned.
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UMatchDegree matches(const Replaceable& text,
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 int32_t& offset,
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 int32_t limit,
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 UBool incremental);
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Implement UnicodeMatcher
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param result            Output param to receive the pattern.
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param escapeUnprintable if True then escape the unprintable characters.
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return                  A reference to 'result'.
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UnicodeString& toPattern(UnicodeString& result,
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UBool escapeUnprintable = FALSE) const;
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Implement UnicodeMatcher
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Returns TRUE if this matcher will match a character c, where c
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * & 0xFF == v, at offset, in the forward direction (with limit >
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * offset).  This is used by <tt>RuleBasedTransliterator</tt> for
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * indexing.
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param v    the given value
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return     TRUE if this matcher will match a character c,
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *             where c & 0xFF == v
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UBool matchesIndexValue(uint8_t v) const;
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Implement UnicodeMatcher
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Implement UnicodeFunctor
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void setData(const TransliterationRuleData*);
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Replace characters in 'text' from 'start' to 'limit' with the
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * output text of this object.  Update the 'cursor' parameter to
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * give the cursor position and return the length of the
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * replacement text.
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param text the text to be matched
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param start inclusive start index of text to be replaced
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param limit exclusive end index of text to be replaced;
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * must be greater than or equal to start
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param cursor output parameter for the cursor position.
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Not all replacer objects will update this, but in a complete
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * tree of replacer objects, representing the entire output side
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * of a transliteration rule, at least one must update it.
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return the number of 16-bit code units in the text replacing
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the characters at offsets start..(limit-1) in text
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual int32_t replace(Replaceable& text,
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            int32_t start,
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            int32_t limit,
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            int32_t& cursor);
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Returns a string representation of this replacer.  If the
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * result of calling this function is passed to the appropriate
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * parser, typically TransliteratorParser, it will produce another
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * replacer that is equal to this one.
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param result the string to receive the pattern.  Previous
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * contents will be deleted.
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param escapeUnprintable if TRUE then convert unprintable
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * character to their hex escape representations, \\uxxxx or
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * \\Uxxxxxxxx.  Unprintable characters are defined by
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Utility.isUnprintable().
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return a reference to 'result'.
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UnicodeString& toReplacerPattern(UnicodeString& result,
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             UBool escapeUnprintable) const;
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Remove any match data.  This must be called before performing a
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * set of matches with this segment.
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void resetMatch();
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for the actual class.
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UClassID getDynamicClassID() const;
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for this class.
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UClassID U_EXPORT2 getStaticClassID();
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Union the set of all characters that may output by this object
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * into the given set.
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param toUnionTo the set into which to union the output characters
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru private:
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The text to be matched.
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString pattern;
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Context object that maps stand-ins to matcher and replacer
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * objects.
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const TransliterationRuleData* data;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The segment number, 1-based, or 0 if not a segment.
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t segmentNumber;
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Start offset, in the match text, of the <em>rightmost</em>
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * match.
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t matchStart;
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Limit offset, in the match text, of the <em>rightmost</em>
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * match.
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t matchLimit;
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
251