rbt_pars.h revision 83a171d1a62abf406f7f44ae671823d5ec20db7d
1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 1999-2011, International Business Machines Corporation
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   11/17/99    aliu        Creation.
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBT_PARS_H
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBT_PARS_H
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
1683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#ifdef __cplusplus
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parseerr.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unorm.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleData;
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeFunctor;
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass ParseData;
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RuleHalf;
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass ParsePosition;
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass StringMatcher;
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliteratorParser : public UMemory {
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru public:
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * A Vector of TransliterationRuleData objects, one for each discrete group
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * of rules in the rule set
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector dataVector;
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * PUBLIC data member.
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * A Vector of UnicodeStrings containing all of the ID blocks in the rule set
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector idBlockVector;
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * PUBLIC data member containing the parsed compound filter, if any.
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeSet* compoundFilter;
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru private:
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The current data object for which we are parsing rules
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRuleData* curData;
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UTransDirection direction;
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Parse error information.
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UParseError parseError;
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Temporary symbol table used during parsing.
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ParseData* parseData;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Temporary vector of matcher variables.  When parsing is complete, this
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * is copied into the array data.variables.  As with data.variables,
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * element 0 corresponds to character data.variablesBase.
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector variablesVector;
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Temporary table of variable names.  When parsing is complete, this is
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * copied into data.variableNames.
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Hashtable variableNames;
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * String of standins for segments.  Used during the parsing of a single
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * rule.  segmentStandins.charAt(0) is the standin for "$1" and corresponds
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * to StringMatcher object segmentObjects.elementAt(0), etc.
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString segmentStandins;
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Vector of StringMatcher objects for segments.  Used during the
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * parsing of a single rule.
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * segmentStandins.charAt(0) is the standin for "$1" and corresponds
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * to StringMatcher object segmentObjects.elementAt(0), etc.
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector segmentObjects;
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The next available stand-in for variables.  This starts at some point in
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the private use area (discovered dynamically) and increments up toward
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * <code>variableLimit</code>.  At any point during parsing, available
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * variables are <code>variableNext..variableLimit-1</code>.
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar variableNext;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The last available stand-in for variables.  This is discovered
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * dynamically.  At any point during parsing, available variables are
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * <code>variableNext..variableLimit-1</code>.
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar variableLimit;
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * When we encounter an undefined variable, we do not immediately signal
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * an error, in case we are defining this variable, e.g., "$a = [a-z];".
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Instead, we save the name of the undefined variable, and substitute
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * in the placeholder char variableLimit - 1, and decrement
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * variableLimit.
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString undefinedVariableName;
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The stand-in character for the 'dot' set, represented by '.' in
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * patterns.  This is allocated the first time it is needed, and
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * reused thereafter.
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar dotStandIn;
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Constructor.
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliteratorParser(UErrorCode &statusReturn);
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Destructor.
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~TransliteratorParser();
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Parse the given string as a sequence of rules, separated by newline
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * characters ('\n'), and cause this object to implement those rules.  Any
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * previous rules are discarded.  Typically this method is called exactly
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * once after construction.
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Parse the given rules, in the given direction.  After this call
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * returns, query the public data members for results.  The caller
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * owns the 'data' and 'compoundFilter' data members after this
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * call returns.
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param rules      rules, separated by ';'
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param direction  either FORWARD or REVERSE.
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param pe         Struct to recieve information on position
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                   of error if an error is encountered
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param ec         Output param set to success/failure code.
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void parse(const UnicodeString& rules,
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               UTransDirection direction,
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               UParseError& pe,
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               UErrorCode& ec);
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Return the compound filter parsed by parse().  Caller owns result.
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return the compound filter parsed by parse().
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeSet* orphanCompoundFilter();
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Return a representation of this transliterator as source rules.
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param rules      Output param to receive the rules.
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param direction  either FORWARD or REVERSE.
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void parseRules(const UnicodeString& rules,
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UTransDirection direction,
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UErrorCode& status);
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * MAIN PARSER.  Parse the next rule in the given rule string, starting
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * at pos.  Return the index after the last character parsed.  Do not
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * parse characters at or after limit.
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Important:  The character at pos must be a non-whitespace character
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * that is not the comment character.
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * This method handles quoting, escaping, and whitespace removal.  It
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * parses the end-of-rule character.  It recognizes context and cursor
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * indicators.  Once it does a lexical breakdown of the rule at pos, it
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * creates a rule object and adds it to our rule list.
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param rules      Output param to receive the rules.
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param pos        the starting position.
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param limit      pointer past the last character of the rule.
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return           the index after the last character parsed.
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t parseRule(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status);
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Set the variable range to [start, end] (inclusive).
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param start    the start value of the range.
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param end      the end value of the range.
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void setVariableRange(int32_t start, int32_t end, UErrorCode& status);
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Assert that the given character is NOT within the variable range.
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * If it is, return FALSE.  This is neccesary to ensure that the
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * variable range does not overlap characters used in a rule.
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param ch     the given character.
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return       True, if the given character is NOT within the variable range.
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool checkVariableRange(UChar32 ch) const;
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Set the maximum backup to 'backup', in response to a pragma
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * statement.
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param backup    the new value to be set.
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void pragmaMaximumBackup(int32_t backup);
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Begin normalizing all rules using the given mode, in response
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * to a pragma statement.
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param mode    the given mode.
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void pragmaNormalizeRules(UNormalizationMode mode);
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Return true if the given rule looks like a pragma.
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param pos offset to the first non-whitespace character
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * of the rule.
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param limit pointer past the last character of the rule.
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return true if the given rule looks like a pragma.
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UBool resemblesPragma(const UnicodeString& rule, int32_t pos, int32_t limit);
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Parse a pragma.  This method assumes resemblesPragma() has
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * already returned true.
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param pos offset to the first non-whitespace character
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * of the rule.
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param limit pointer past the last character of the rule.
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return the position index after the final ';' of the pragma,
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * or -1 on failure.
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status);
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Called by main parser upon syntax error.  Search the rule string
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * for the probable end of the rule.  Of course, if the error is that
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the end of rule marker is missing, then the rule end will not be found.
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * In any case the rule start will be correctly reported.
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param parseErrorCode error code.
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param msg error description.
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param start position of first character of current rule.
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return start position of first character of current rule.
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t syntaxError(UErrorCode parseErrorCode, const UnicodeString&, int32_t start,
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UErrorCode& status);
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Parse a UnicodeSet out, store it, and return the stand-in character
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * used to represent it.
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param rule    the rule for UnicodeSet.
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param pos     the position in pattern at which to start parsing.
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return        the stand-in character used to represent it.
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar parseSet(const UnicodeString& rule,
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   ParsePosition& pos,
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UErrorCode& status);
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Generate and return a stand-in for a new UnicodeFunctor.  Store
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the matcher (adopt it).
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param adopted the UnicodeFunctor to be adopted.
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return        a stand-in for a new UnicodeFunctor.
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar generateStandInFor(UnicodeFunctor* adopted, UErrorCode& status);
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Return the standin for segment seg (1-based).
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param seg    the given segment.
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return       the standIn character for the given segment.
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar getSegmentStandin(int32_t seg, UErrorCode& status);
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Set the object for segment seg (1-based).
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param seg      the given segment.
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param adopted  the StringMatcher to be adopted.
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void setSegmentObject(int32_t seg, StringMatcher* adopted, UErrorCode& status);
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Return the stand-in for the dot set.  It is allocated the first
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * time and reused thereafter.
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return    the stand-in for the dot set.
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar getDotStandIn(UErrorCode& status);
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Append the value of the given variable name to the given
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * UnicodeString.
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param name    the variable name to be appended.
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param buf     the given UnicodeString to append to.
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void appendVariableDef(const UnicodeString& name,
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UnicodeString& buf,
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UErrorCode& status);
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Glue method to get around access restrictions in C++.
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*static Transliterator* createBasicInstance(const UnicodeString& id,
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                               const UnicodeString* canonID);*/
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    friend class RuleHalf;
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Disallowed methods; no impl.
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Copy constructor
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliteratorParser(const TransliteratorParser&);
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Assignment operator
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliteratorParser& operator=(const TransliteratorParser&);
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
33883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif /* #ifdef __cplusplus */
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Strip/convert the following from the transliterator rules:
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * comments
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * newlines
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * white space at the beginning and end of a line
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * unescape \u notation
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The target must be equal in size as the source.
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorCode *status);
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
356