164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 1999-2007, International Business Machines Corporation 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11/17/99 aliu Creation. 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBT_SET_H 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBT_SET_H 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utrans.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass Replaceable; 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRule; 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleData; 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeFilter; 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeString; 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeSet; 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A set of rules for a <code>RuleBasedTransliterator</code>. 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @author Alan Liu 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleSet : public UMemory { 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Vector of rules, in the order added. This is used while the 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * rule set is getting built. After that, freeze() reorders and 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes the rules into rules[]. Any given rule is stored once 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in ruleVector, and one or more times in rules[]. ruleVector 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * owns and deletes the rules. 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector* ruleVector; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sorted and indexed table of rules. This is created by freeze() 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from the rules in ruleVector. It contains alias pointers to 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the rules in ruleVector. It is zero before freeze() is called 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and non-zero thereafter. 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRule** rules; 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Index table. For text having a first character c, compute x = c&0xFF. 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Now use rules[index[x]..index[x+1]-1]. This index table is created by 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * freeze(). Before freeze() is called it contains garbage. 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t index[257]; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Length of the longest preceding context 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t maxContextLength; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Construct a new empty rule set. 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Output parameter filled in with success or failure status. 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleSet(UErrorCode& status); 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleSet(const TransliterationRuleSet&); 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~TransliterationRuleSet(); 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Change the data object that this rule belongs to. Used 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * internally by the TransliterationRuleData copy constructor. 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param data the new data value to be set. 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void setData(const TransliterationRuleData* data); 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the maximum context length. 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the length of the longest preceding context. 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t getMaximumContextLength(void) const; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Add a rule to this set. Rules are added in order, and order is 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * significant. The last call to this method must be followed by 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a call to <code>freeze()</code> before the rule set is used. 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This method must <em>not</em> be called after freeze() has been 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * called. 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param adoptedRule the rule to add 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void addRule(TransliterationRule* adoptedRule, 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status); 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Check this for masked rules and index it to optimize performance. 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The sequence of operations is: (1) add rules to a set using 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>addRule()</code>; (2) freeze the set using 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>freeze()</code>; (3) use the rule set. If 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>addRule()</code> is called after calling this method, it 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * invalidates this object, and this method must be called again. 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * That is, <code>freeze()</code> may be called multiple times, 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * although for optimal performance it shouldn't be. 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param parseError A pointer to UParseError to receive information about errors 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * occurred. 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Output parameter filled in with success or failure status. 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void freeze(UParseError& parseError, UErrorCode& status); 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterate the given text with the given UTransPosition 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indices. Return TRUE if the transliteration should continue 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note that FALSE is only ever returned if isIncremental is TRUE. 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param text the text to be transliterated 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param index the position indices, which will be updated 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param isIncremental if TRUE, assume new text may be inserted 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * at index.limit, and return FALSE if thre is a partial match. 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return TRUE unless a U_PARTIAL_MATCH has been obtained, 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indicating that transliteration should stop until more text 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * arrives. 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool transliterate(Replaceable& text, 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransPosition& index, 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isIncremental); 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Create rule strings that represents this rule set. 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param result string to receive the rule strings. Current 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * contents will be deleted. 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param escapeUnprintable True, will escape the unprintable characters 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A reference to 'result'. 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UnicodeString& toRules(UnicodeString& result, 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable) const; 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the set of all characters that may be modified 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (getTarget=false) or emitted (getTarget=true) by this set. 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet& getSourceTargetSet(UnicodeSet& result, 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool getTarget) const; 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 168