164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 1999-2007, International Business Machines Corporation
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved.
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   11/17/99    aliu        Creation.
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBT_SET_H
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBT_SET_H
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utrans.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass Replaceable;
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRule;
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleData;
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeFilter;
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeString;
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeSet;
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A set of rules for a <code>RuleBasedTransliterator</code>.
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @author Alan Liu
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleSet : public UMemory {
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Vector of rules, in the order added.  This is used while the
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * rule set is getting built.  After that, freeze() reorders and
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * indexes the rules into rules[].  Any given rule is stored once
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * in ruleVector, and one or more times in rules[].  ruleVector
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * owns and deletes the rules.
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector* ruleVector;
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Sorted and indexed table of rules.  This is created by freeze()
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * from the rules in ruleVector.  It contains alias pointers to
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the rules in ruleVector.  It is zero before freeze() is called
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * and non-zero thereafter.
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRule** rules;
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Index table.  For text having a first character c, compute x = c&0xFF.
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Now use rules[index[x]..index[x+1]-1].  This index table is created by
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * freeze().  Before freeze() is called it contains garbage.
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t index[257];
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Length of the longest preceding context
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t maxContextLength;
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Construct a new empty rule set.
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param status    Output parameter filled in with success or failure status.
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRuleSet(UErrorCode& status);
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Copy constructor.
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRuleSet(const TransliterationRuleSet&);
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Destructor.
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual ~TransliterationRuleSet();
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Change the data object that this rule belongs to.  Used
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * internally by the TransliterationRuleData copy constructor.
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param data    the new data value to be set.
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void setData(const TransliterationRuleData* data);
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Return the maximum context length.
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return the length of the longest preceding context.
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual int32_t getMaximumContextLength(void) const;
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Add a rule to this set.  Rules are added in order, and order is
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * significant.  The last call to this method must be followed by
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * a call to <code>freeze()</code> before the rule set is used.
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * This method must <em>not</em> be called after freeze() has been
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * called.
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param adoptedRule the rule to add
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void addRule(TransliterationRule* adoptedRule,
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UErrorCode& status);
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Check this for masked rules and index it to optimize performance.
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The sequence of operations is: (1) add rules to a set using
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * <code>addRule()</code>; (2) freeze the set using
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * <code>freeze()</code>; (3) use the rule set.  If
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * <code>addRule()</code> is called after calling this method, it
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * invalidates this object, and this method must be called again.
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * That is, <code>freeze()</code> may be called multiple times,
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * although for optimal performance it shouldn't be.
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param parseError A pointer to UParseError to receive information about errors
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                   occurred.
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param status     Output parameter filled in with success or failure status.
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void freeze(UParseError& parseError, UErrorCode& status);
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Transliterate the given text with the given UTransPosition
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * indices.  Return TRUE if the transliteration should continue
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Note that FALSE is only ever returned if isIncremental is TRUE.
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param text the text to be transliterated
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param index the position indices, which will be updated
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param isIncremental if TRUE, assume new text may be inserted
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * at index.limit, and return FALSE if thre is a partial match.
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * indicating that transliteration should stop until more text
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * arrives.
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool transliterate(Replaceable& text,
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UTransPosition& index,
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UBool isIncremental);
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Create rule strings that represents this rule set.
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param result string to receive the rule strings.  Current
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * contents will be deleted.
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param escapeUnprintable  True, will escape the unprintable characters
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return    A reference to 'result'.
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UnicodeString& toRules(UnicodeString& result,
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                   UBool escapeUnprintable) const;
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Return the set of all characters that may be modified
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * (getTarget=false) or emitted (getTarget=true) by this set.
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeSet& getSourceTargetSet(UnicodeSet& result,
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UBool getTarget) const;
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
168