rbt_pars.h revision 83a171d1a62abf406f7f44ae671823d5ec20db7d
1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 1999-2011, International Business Machines Corporation 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11/17/99 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBT_PARS_H 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBT_PARS_H 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 1683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#ifdef __cplusplus 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parseerr.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unorm.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleData; 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeFunctor; 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass ParseData; 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RuleHalf; 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass ParsePosition; 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass StringMatcher; 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliteratorParser : public UMemory { 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru public: 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A Vector of TransliterationRuleData objects, one for each discrete group 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of rules in the rule set 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector dataVector; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * PUBLIC data member. 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A Vector of UnicodeStrings containing all of the ID blocks in the rule set 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector idBlockVector; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * PUBLIC data member containing the parsed compound filter, if any. 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet* compoundFilter; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru private: 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The current data object for which we are parsing rules 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleData* curData; 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Parse error information. 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError parseError; 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Temporary symbol table used during parsing. 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ParseData* parseData; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Temporary vector of matcher variables. When parsing is complete, this 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is copied into the array data.variables. As with data.variables, 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * element 0 corresponds to character data.variablesBase. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector variablesVector; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Temporary table of variable names. When parsing is complete, this is 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * copied into data.variableNames. 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Hashtable variableNames; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * String of standins for segments. Used during the parsing of a single 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * rule. segmentStandins.charAt(0) is the standin for "$1" and corresponds 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to StringMatcher object segmentObjects.elementAt(0), etc. 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString segmentStandins; 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Vector of StringMatcher objects for segments. Used during the 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * parsing of a single rule. 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * segmentStandins.charAt(0) is the standin for "$1" and corresponds 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to StringMatcher object segmentObjects.elementAt(0), etc. 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector segmentObjects; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The next available stand-in for variables. This starts at some point in 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the private use area (discovered dynamically) and increments up toward 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>variableLimit</code>. At any point during parsing, available 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * variables are <code>variableNext..variableLimit-1</code>. 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar variableNext; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The last available stand-in for variables. This is discovered 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dynamically. At any point during parsing, available variables are 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>variableNext..variableLimit-1</code>. 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar variableLimit; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When we encounter an undefined variable, we do not immediately signal 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * an error, in case we are defining this variable, e.g., "$a = [a-z];". 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Instead, we save the name of the undefined variable, and substitute 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in the placeholder char variableLimit - 1, and decrement 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * variableLimit. 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString undefinedVariableName; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The stand-in character for the 'dot' set, represented by '.' in 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * patterns. This is allocated the first time it is needed, and 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reused thereafter. 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar dotStandIn; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructor. 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliteratorParser(UErrorCode &statusReturn); 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~TransliteratorParser(); 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Parse the given string as a sequence of rules, separated by newline 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * characters ('\n'), and cause this object to implement those rules. Any 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * previous rules are discarded. Typically this method is called exactly 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * once after construction. 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Parse the given rules, in the given direction. After this call 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returns, query the public data members for results. The caller 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * owns the 'data' and 'compoundFilter' data members after this 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * call returns. 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules rules, separated by ';' 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param direction either FORWARD or REVERSE. 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pe Struct to recieve information on position 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of error if an error is encountered 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param ec Output param set to success/failure code. 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void parse(const UnicodeString& rules, 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError& pe, 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& ec); 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the compound filter parsed by parse(). Caller owns result. 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the compound filter parsed by parse(). 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet* orphanCompoundFilter(); 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return a representation of this transliterator as source rules. 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules Output param to receive the rules. 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param direction either FORWARD or REVERSE. 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void parseRules(const UnicodeString& rules, 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MAIN PARSER. Parse the next rule in the given rule string, starting 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * at pos. Return the index after the last character parsed. Do not 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * parse characters at or after limit. 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Important: The character at pos must be a non-whitespace character 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that is not the comment character. 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This method handles quoting, escaping, and whitespace removal. It 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * parses the end-of-rule character. It recognizes context and cursor 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indicators. Once it does a lexical breakdown of the rule at pos, it 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * creates a rule object and adds it to our rule list. 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules Output param to receive the rules. 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pos the starting position. 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param limit pointer past the last character of the rule. 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the index after the last character parsed. 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t parseRule(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status); 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the variable range to [start, end] (inclusive). 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param start the start value of the range. 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param end the end value of the range. 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void setVariableRange(int32_t start, int32_t end, UErrorCode& status); 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assert that the given character is NOT within the variable range. 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If it is, return FALSE. This is neccesary to ensure that the 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * variable range does not overlap characters used in a rule. 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param ch the given character. 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return True, if the given character is NOT within the variable range. 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool checkVariableRange(UChar32 ch) const; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the maximum backup to 'backup', in response to a pragma 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * statement. 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param backup the new value to be set. 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void pragmaMaximumBackup(int32_t backup); 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Begin normalizing all rules using the given mode, in response 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to a pragma statement. 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param mode the given mode. 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void pragmaNormalizeRules(UNormalizationMode mode); 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return true if the given rule looks like a pragma. 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pos offset to the first non-whitespace character 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the rule. 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param limit pointer past the last character of the rule. 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return true if the given rule looks like a pragma. 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UBool resemblesPragma(const UnicodeString& rule, int32_t pos, int32_t limit); 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Parse a pragma. This method assumes resemblesPragma() has 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * already returned true. 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pos offset to the first non-whitespace character 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the rule. 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param limit pointer past the last character of the rule. 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the position index after the final ';' of the pragma, 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or -1 on failure. 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status); 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Called by main parser upon syntax error. Search the rule string 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for the probable end of the rule. Of course, if the error is that 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the end of rule marker is missing, then the rule end will not be found. 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In any case the rule start will be correctly reported. 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param parseErrorCode error code. 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param msg error description. 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param start position of first character of current rule. 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return start position of first character of current rule. 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t syntaxError(UErrorCode parseErrorCode, const UnicodeString&, int32_t start, 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status); 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Parse a UnicodeSet out, store it, and return the stand-in character 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * used to represent it. 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rule the rule for UnicodeSet. 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pos the position in pattern at which to start parsing. 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the stand-in character used to represent it. 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar parseSet(const UnicodeString& rule, 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ParsePosition& pos, 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status); 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Generate and return a stand-in for a new UnicodeFunctor. Store 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the matcher (adopt it). 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param adopted the UnicodeFunctor to be adopted. 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return a stand-in for a new UnicodeFunctor. 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar generateStandInFor(UnicodeFunctor* adopted, UErrorCode& status); 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the standin for segment seg (1-based). 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param seg the given segment. 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the standIn character for the given segment. 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar getSegmentStandin(int32_t seg, UErrorCode& status); 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the object for segment seg (1-based). 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param seg the given segment. 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param adopted the StringMatcher to be adopted. 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void setSegmentObject(int32_t seg, StringMatcher* adopted, UErrorCode& status); 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the stand-in for the dot set. It is allocated the first 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * time and reused thereafter. 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the stand-in for the dot set. 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar getDotStandIn(UErrorCode& status); 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Append the value of the given variable name to the given 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeString. 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param name the variable name to be appended. 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param buf the given UnicodeString to append to. 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void appendVariableDef(const UnicodeString& name, 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString& buf, 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status); 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Glue method to get around access restrictions in C++. 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*static Transliterator* createBasicInstance(const UnicodeString& id, 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString* canonID);*/ 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru friend class RuleHalf; 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Disallowed methods; no impl. 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliteratorParser(const TransliteratorParser&); 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliteratorParser& operator=(const TransliteratorParser&); 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif /* #ifdef __cplusplus */ 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Strip/convert the following from the transliterator rules: 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * comments 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * newlines 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * white space at the beginning and end of a line 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * unescape \u notation 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The target must be equal in size as the source. 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorCode *status); 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 356