1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 1999-2007, International Business Machines Corporation 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11/17/99 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBT_DATA_H 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBT_DATA_H 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uclean.h" 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_set.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeFunctor; 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeMatcher; 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeReplacer; 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The rule data for a RuleBasedTransliterators. RBT objects hold 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a const pointer to a TRD object that they do not own. TRD objects 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are essentially the parsed rules in compact, usable form. The 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TRD objects themselves are held for the life of the process in 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a static cache owned by Transliterator. 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This class' API is a little asymmetric. There is a method to 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * define a variable, but no way to define a set. This is because the 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sets are defined by the parser in a UVector, and the vector is 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * copied into a fixed-size array here. Once this is done, no new 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sets may be defined. In practice, there is no need to do so, since 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * generating the data and using it are discrete phases. When there 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is a need to access the set data during the parse phase, another 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * data structure handles this. See the parsing code for more 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * details. 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleData : public UMemory { 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // PUBLIC DATA MEMBERS 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Rule table. May be empty. 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleSet ruleSet; 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Map variable name (String) to variable (UnicodeString). A variable name 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * corresponds to zero or more characters, stored in a UnicodeString in 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this hash. One or more of these chars may also correspond to a 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeMatcher, in which case the character in the UnicodeString in this hash is 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a stand-in: it is an index for a secondary lookup in 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * data.variables. The stand-in also represents the UnicodeMatcher in 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the stored rules. 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Hashtable variableNames; 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Map category variable (UChar) to set (UnicodeFunctor). 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Variables that correspond to a set of characters are mapped 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from variable name to a stand-in character in data.variableNames. 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The stand-in then serves as a key in this hash to lookup the 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * actual UnicodeFunctor object. In addition, the stand-in is 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stored in the rule text to represent the set of characters. 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * variables[i] represents character (variablesBase + i). 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFunctor** variables; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Flag that indicates whether the variables are owned (if a single 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * call to Transliterator::createFromRules() produces a CompoundTransliterator 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with more than one RuleBasedTransliterator as children, they all share 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the same variables list, so only the first one is considered to own 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the variables) 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool variablesAreOwned; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The character that represents variables[0]. Characters 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * variablesBase through variablesBase + 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * variablesLength - 1 represent UnicodeFunctor objects. 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar variablesBase; 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The length of variables. 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t variablesLength; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructor 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Output param set to success/failure code on exit. 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleData(UErrorCode& status); 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy Constructor 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleData(const TransliterationRuleData&); 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * destructor 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~TransliterationRuleData(); 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Given a stand-in character, return the UnicodeFunctor that it 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * represents, or NULL if it doesn't represent anything. 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param standIn the given stand-in character. 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the UnicodeFunctor that 'standIn' represents 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFunctor* lookup(UChar32 standIn) const; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Given a stand-in character, return the UnicodeMatcher that it 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * represents, or NULL if it doesn't represent anything or if it 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * represents something that is not a matcher. 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param standIn the given stand-in character. 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return return the UnicodeMatcher that 'standIn' represents 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeMatcher* lookupMatcher(UChar32 standIn) const; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Given a stand-in character, return the UnicodeReplacer that it 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * represents, or NULL if it doesn't represent anything or if it 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * represents something that is not a replacer. 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param standIn the given stand-in character. 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return return the UnicodeReplacer that 'standIn' represents 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeReplacer* lookupReplacer(UChar32 standIn) const; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 153