1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Copyright (C) 2001-2011, International Business Machines Corporation 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and others. All Rights Reserved. 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************** 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Date Name Description 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 07/23/01 aliu Creation. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef STRMATCH_H 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define STRMATCH_H 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unifunct.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unimatch.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unirepl.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleData; 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * An object that matches a fixed input string, implementing the 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeMatcher API. This object also implements the 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API, allowing it to emit the matched text as 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * output. Since the match text may contain flexible match elements, 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * such as UnicodeSets, the emitted text is not the match pattern, but 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * instead a substring of the actual matched text. Following 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * convention, the output text is the leftmost match seen up to this 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * point. 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A StringMatcher may represent a segment, in which case it has a 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * positive segment number. This affects how the matcher converts 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * itself to a pattern but does not otherwise affect its function. 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A StringMatcher that is not a segment should not be used as a 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer. 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public UnicodeReplacer { 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru public: 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Construct a matcher that matches the given pattern string. 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param string the pattern to be matched, possibly containing 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stand-ins that represent nested UnicodeMatcher objects. 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param start inclusive start index of text to be replaced 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param limit exclusive end index of text to be replaced; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * must be greater than or equal to start 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param segmentNum the segment number from 1..n, or 0 if this is 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not a segment. 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param data context object mapping stand-ins to 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeMatcher objects. 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru StringMatcher(const UnicodeString& string, 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t segmentNum, 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const TransliterationRuleData& data); 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param o the object to be copied. 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru StringMatcher(const StringMatcher& o); 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~StringMatcher(); 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return a copy of the object. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UnicodeFunctor* clone() const; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer. 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the UnicodeMatcher point. 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UnicodeMatcher* toMatcher() const; 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer. 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the UnicodeReplacer pointer. 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UnicodeReplacer* toReplacer() const; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param text the text to be matched 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param offset on input, the index into text at which to begin 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * matching. On output, the limit of the matched text. The 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * number of matched characters is the output value of offset 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * minus the input value. Offset should always point to the 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * HIGH SURROGATE (leading code unit) of a pair of surrogates, 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * both on entry and upon return. 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param limit the limit index of text to be matched. Greater 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * than offset for a forward direction match, less than offset for 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a backward direction match. The last character to be 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * considered for matching will be text.charAt(limit-1) in the 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * forward direction or text.charAt(limit+1) in the backward 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * direction. 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param incremental if TRUE, then assume further characters may 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be inserted at limit and check for partial matching. Otherwise 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * assume the text as given is complete. 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return a match degree value indicating a full match, a partial 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * match, or a mismatch. If incremental is FALSE then 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * U_PARTIAL_MATCH should never be returned. 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UMatchDegree matches(const Replaceable& text, 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& offset, 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental); 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param result Output param to receive the pattern. 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param escapeUnprintable if True then escape the unprintable characters. 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A reference to 'result'. 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UnicodeString& toPattern(UnicodeString& result, 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable = FALSE) const; 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns TRUE if this matcher will match a character c, where c 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * & 0xFF == v, at offset, in the forward direction (with limit > 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offset). This is used by <tt>RuleBasedTransliterator</tt> for 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexing. 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param v the given value 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return TRUE if this matcher will match a character c, 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * where c & 0xFF == v 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UBool matchesIndexValue(uint8_t v) const; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void setData(const TransliterationRuleData*); 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Replace characters in 'text' from 'start' to 'limit' with the 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * output text of this object. Update the 'cursor' parameter to 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * give the cursor position and return the length of the 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * replacement text. 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param text the text to be matched 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param start inclusive start index of text to be replaced 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param limit exclusive end index of text to be replaced; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * must be greater than or equal to start 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param cursor output parameter for the cursor position. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Not all replacer objects will update this, but in a complete 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * tree of replacer objects, representing the entire output side 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of a transliteration rule, at least one must update it. 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the number of 16-bit code units in the text replacing 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the characters at offsets start..(limit-1) in text 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t replace(Replaceable& text, 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& cursor); 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a string representation of this replacer. If the 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * result of calling this function is passed to the appropriate 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * parser, typically TransliteratorParser, it will produce another 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * replacer that is equal to this one. 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param result the string to receive the pattern. Previous 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * contents will be deleted. 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param escapeUnprintable if TRUE then convert unprintable 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * character to their hex escape representations, \\uxxxx or 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \\Uxxxxxxxx. Unprintable characters are defined by 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Utility.isUnprintable(). 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return a reference to 'result'. 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UnicodeString& toReplacerPattern(UnicodeString& result, 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable) const; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove any match data. This must be called before performing a 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * set of matches with this segment. 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void resetMatch(); 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for the actual class. 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UClassID getDynamicClassID() const; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for this class. 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UClassID U_EXPORT2 getStaticClassID(); 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Union the set of all characters that may output by this object 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into the given set. 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param toUnionTo the set into which to union the output characters 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru private: 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The text to be matched. 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString pattern; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Context object that maps stand-ins to matcher and replacer 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * objects. 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const TransliterationRuleData* data; 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The segment number, 1-based, or 0 if not a segment. 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t segmentNumber; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Start offset, in the match text, of the <em>rightmost</em> 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * match. 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t matchStart; 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Limit offset, in the match text, of the <em>rightmost</em> 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * match. 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t matchLimit; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 251