1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 07/18/01 aliu Creation. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef UNIMATCH_H 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UNIMATCH_H 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \file 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \brief C++ API: Unicode Matcher 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass Replaceable; 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeString; 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeSet; 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constants returned by <code>UnicodeMatcher::matches()</code> 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indicating the degree of match. 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum UMatchDegree { 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constant returned by <code>matches()</code> indicating a 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * mismatch between the text and this matcher. The text contains 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a character which does not match, or the text does not contain 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * all desired characters for a non-incremental match. 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_MISMATCH, 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constant returned by <code>matches()</code> indicating a 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * partial match between the text and this matcher. This value is 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * only returned for incremental match operations. All characters 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the text match, but more characters are required for a 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * complete match. Alternatively, for variable-length matchers, 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * all characters of the text match, and if more characters were 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * supplied at limit, they might also match. 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_PARTIAL_MATCH, 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constant returned by <code>matches()</code> indicating a 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * complete match between the text and this matcher. For an 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * incremental variable-length match, this value is returned if 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the given text matches, and it is known that additional 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * characters would not alter the extent of the match. 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_MATCH 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>UnicodeMatcher</code> defines a protocol for objects that can 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * match a range of characters in a Replaceable string. 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~UnicodeMatcher(); 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return a UMatchDegree value indicating the degree of match for 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the given text at the given offset. Zero, one, or more 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * characters may be matched. 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Matching in the forward direction is indicated by limit > 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offset. Characters from offset forwards to limit-1 will be 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * considered for matching. 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Matching in the reverse direction is indicated by limit < 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offset. Characters from offset backwards to limit+1 will be 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * considered for matching. 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If limit == offset then the only match possible is a zero 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * character match (which subclasses may implement if desired). 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * As a side effect, advance the offset parameter to the limit of 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the matched substring. In the forward direction, this will be 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the index of the last matched character plus one. In the 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reverse direction, this will be the index of the last matched 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * character minus one. 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p>Note: This method is not const because some classes may 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * modify their state as the result of a match. 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param text the text to be matched 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param offset on input, the index into text at which to begin 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * matching. On output, the limit of the matched text. The 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * number of matched characters is the output value of offset 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * minus the input value. Offset should always point to the 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * HIGH SURROGATE (leading code unit) of a pair of surrogates, 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * both on entry and upon return. 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param limit the limit index of text to be matched. Greater 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * than offset for a forward direction match, less than offset for 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a backward direction match. The last character to be 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * considered for matching will be text.charAt(limit-1) in the 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * forward direction or text.charAt(limit+1) in the backward 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * direction. 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param incremental if TRUE, then assume further characters may 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be inserted at limit and check for partial matching. Otherwise 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * assume the text as given is complete. 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return a match degree value indicating a full match, a partial 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * match, or a mismatch. If incremental is FALSE then 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * U_PARTIAL_MATCH should never be returned. 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UMatchDegree matches(const Replaceable& text, 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& offset, 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental) = 0; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a string representation of this matcher. If the result of 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * calling this function is passed to the appropriate parser, it 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * will produce another matcher that is equal to this one. 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param result the string to receive the pattern. Previous 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * contents will be deleted. 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param escapeUnprintable if TRUE then convert unprintable 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * character to their hex escape representations, \\uxxxx or 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \\Uxxxxxxxx. Unprintable characters are those other than 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * U+000A, U+0020..U+007E. 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UnicodeString& toPattern(UnicodeString& result, 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable = FALSE) const = 0; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns TRUE if this matcher will match a character c, where c 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * & 0xFF == v, at offset, in the forward direction (with limit > 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offset). This is used by <tt>RuleBasedTransliterator</tt> for 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexing. 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UBool matchesIndexValue(uint8_t v) const = 0; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Union the set of all characters that may be matched by this object 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into the given set. 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param toUnionTo the set into which to union the source characters 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.4 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 164