1/* 2********************************************************************** 3* Copyright (C) 1999-2007, International Business Machines Corporation 4* and others. All Rights Reserved. 5********************************************************************** 6* Date Name Description 7* 11/17/99 aliu Creation. 8********************************************************************** 9*/ 10#ifndef RBT_DATA_H 11#define RBT_DATA_H 12 13#include "unicode/utypes.h" 14#include "unicode/uclean.h" 15 16#if !UCONFIG_NO_TRANSLITERATION 17 18#include "unicode/uobject.h" 19#include "rbt_set.h" 20#include "hash.h" 21 22U_NAMESPACE_BEGIN 23 24class UnicodeFunctor; 25class UnicodeMatcher; 26class UnicodeReplacer; 27 28/** 29 * The rule data for a RuleBasedTransliterators. RBT objects hold 30 * a const pointer to a TRD object that they do not own. TRD objects 31 * are essentially the parsed rules in compact, usable form. The 32 * TRD objects themselves are held for the life of the process in 33 * a static cache owned by Transliterator. 34 * 35 * This class' API is a little asymmetric. There is a method to 36 * define a variable, but no way to define a set. This is because the 37 * sets are defined by the parser in a UVector, and the vector is 38 * copied into a fixed-size array here. Once this is done, no new 39 * sets may be defined. In practice, there is no need to do so, since 40 * generating the data and using it are discrete phases. When there 41 * is a need to access the set data during the parse phase, another 42 * data structure handles this. See the parsing code for more 43 * details. 44 */ 45class TransliterationRuleData : public UMemory { 46 47public: 48 49 // PUBLIC DATA MEMBERS 50 51 /** 52 * Rule table. May be empty. 53 */ 54 TransliterationRuleSet ruleSet; 55 56 /** 57 * Map variable name (String) to variable (UnicodeString). A variable name 58 * corresponds to zero or more characters, stored in a UnicodeString in 59 * this hash. One or more of these chars may also correspond to a 60 * UnicodeMatcher, in which case the character in the UnicodeString in this hash is 61 * a stand-in: it is an index for a secondary lookup in 62 * data.variables. The stand-in also represents the UnicodeMatcher in 63 * the stored rules. 64 */ 65 Hashtable variableNames; 66 67 /** 68 * Map category variable (UChar) to set (UnicodeFunctor). 69 * Variables that correspond to a set of characters are mapped 70 * from variable name to a stand-in character in data.variableNames. 71 * The stand-in then serves as a key in this hash to lookup the 72 * actual UnicodeFunctor object. In addition, the stand-in is 73 * stored in the rule text to represent the set of characters. 74 * variables[i] represents character (variablesBase + i). 75 */ 76 UnicodeFunctor** variables; 77 78 /** 79 * Flag that indicates whether the variables are owned (if a single 80 * call to Transliterator::createFromRules() produces a CompoundTransliterator 81 * with more than one RuleBasedTransliterator as children, they all share 82 * the same variables list, so only the first one is considered to own 83 * the variables) 84 */ 85 UBool variablesAreOwned; 86 87 /** 88 * The character that represents variables[0]. Characters 89 * variablesBase through variablesBase + 90 * variablesLength - 1 represent UnicodeFunctor objects. 91 */ 92 UChar variablesBase; 93 94 /** 95 * The length of variables. 96 */ 97 int32_t variablesLength; 98 99public: 100 101 /** 102 * Constructor 103 * @param status Output param set to success/failure code on exit. 104 */ 105 TransliterationRuleData(UErrorCode& status); 106 107 /** 108 * Copy Constructor 109 */ 110 TransliterationRuleData(const TransliterationRuleData&); 111 112 /** 113 * destructor 114 */ 115 ~TransliterationRuleData(); 116 117 /** 118 * Given a stand-in character, return the UnicodeFunctor that it 119 * represents, or NULL if it doesn't represent anything. 120 * @param standIn the given stand-in character. 121 * @return the UnicodeFunctor that 'standIn' represents 122 */ 123 UnicodeFunctor* lookup(UChar32 standIn) const; 124 125 /** 126 * Given a stand-in character, return the UnicodeMatcher that it 127 * represents, or NULL if it doesn't represent anything or if it 128 * represents something that is not a matcher. 129 * @param standIn the given stand-in character. 130 * @return return the UnicodeMatcher that 'standIn' represents 131 */ 132 UnicodeMatcher* lookupMatcher(UChar32 standIn) const; 133 134 /** 135 * Given a stand-in character, return the UnicodeReplacer that it 136 * represents, or NULL if it doesn't represent anything or if it 137 * represents something that is not a replacer. 138 * @param standIn the given stand-in character. 139 * @return return the UnicodeReplacer that 'standIn' represents 140 */ 141 UnicodeReplacer* lookupReplacer(UChar32 standIn) const; 142 143 144private: 145 TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class 146}; 147 148U_NAMESPACE_END 149 150#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 151 152#endif 153