1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   07/18/01    aliu        Creation.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef UNIMATCH_H
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UNIMATCH_H
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \file
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \brief C++ API: Unicode Matcher
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass Replaceable;
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeString;
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeSet;
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constants returned by <code>UnicodeMatcher::matches()</code>
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indicating the degree of match.
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum UMatchDegree {
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Constant returned by <code>matches()</code> indicating a
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * mismatch between the text and this matcher.  The text contains
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * a character which does not match, or the text does not contain
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * all desired characters for a non-incremental match.
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @stable ICU 2.4
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_MISMATCH,
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Constant returned by <code>matches()</code> indicating a
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * partial match between the text and this matcher.  This value is
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * only returned for incremental match operations.  All characters
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * of the text match, but more characters are required for a
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * complete match.  Alternatively, for variable-length matchers,
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * all characters of the text match, and if more characters were
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * supplied at limit, they might also match.
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @stable ICU 2.4
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_PARTIAL_MATCH,
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Constant returned by <code>matches()</code> indicating a
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * complete match between the text and this matcher.  For an
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * incremental variable-length match, this value is returned if
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the given text matches, and it is known that additional
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * characters would not alter the extent of the match.
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @stable ICU 2.4
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_MATCH
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>UnicodeMatcher</code> defines a protocol for objects that can
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * match a range of characters in a Replaceable string.
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Destructor.
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @stable ICU 2.4
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual ~UnicodeMatcher();
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Return a UMatchDegree value indicating the degree of match for
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the given text at the given offset.  Zero, one, or more
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * characters may be matched.
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Matching in the forward direction is indicated by limit >
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * offset.  Characters from offset forwards to limit-1 will be
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * considered for matching.
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Matching in the reverse direction is indicated by limit <
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * offset.  Characters from offset backwards to limit+1 will be
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * considered for matching.
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * If limit == offset then the only match possible is a zero
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * character match (which subclasses may implement if desired).
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * As a side effect, advance the offset parameter to the limit of
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the matched substring.  In the forward direction, this will be
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the index of the last matched character plus one.  In the
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * reverse direction, this will be the index of the last matched
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * character minus one.
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * <p>Note:  This method is not const because some classes may
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * modify their state as the result of a match.
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param text the text to be matched
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param offset on input, the index into text at which to begin
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * matching.  On output, the limit of the matched text.  The
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * number of matched characters is the output value of offset
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * minus the input value.  Offset should always point to the
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * HIGH SURROGATE (leading code unit) of a pair of surrogates,
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * both on entry and upon return.
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param limit the limit index of text to be matched.  Greater
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * than offset for a forward direction match, less than offset for
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * a backward direction match.  The last character to be
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * considered for matching will be text.charAt(limit-1) in the
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * forward direction or text.charAt(limit+1) in the backward
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * direction.
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param incremental if TRUE, then assume further characters may
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * be inserted at limit and check for partial matching.  Otherwise
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * assume the text as given is complete.
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return a match degree value indicating a full match, a partial
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * match, or a mismatch.  If incremental is FALSE then
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * U_PARTIAL_MATCH should never be returned.
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @stable ICU 2.4
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UMatchDegree matches(const Replaceable& text,
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 int32_t& offset,
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 int32_t limit,
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 UBool incremental) = 0;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Returns a string representation of this matcher.  If the result of
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * calling this function is passed to the appropriate parser, it
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * will produce another matcher that is equal to this one.
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param result the string to receive the pattern.  Previous
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * contents will be deleted.
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param escapeUnprintable if TRUE then convert unprintable
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * character to their hex escape representations, \\uxxxx or
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * \\Uxxxxxxxx.  Unprintable characters are those other than
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * U+000A, U+0020..U+007E.
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @stable ICU 2.4
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UnicodeString& toPattern(UnicodeString& result,
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UBool escapeUnprintable = FALSE) const = 0;
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Returns TRUE if this matcher will match a character c, where c
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * & 0xFF == v, at offset, in the forward direction (with limit >
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * offset).  This is used by <tt>RuleBasedTransliterator</tt> for
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * indexing.
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @stable ICU 2.4
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UBool matchesIndexValue(uint8_t v) const = 0;
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Union the set of all characters that may be matched by this object
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * into the given set.
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param toUnionTo the set into which to union the source characters
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @stable ICU 2.4
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
164