1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10package android.icu.text;
11
12import java.util.ArrayList;
13import java.util.List;
14
15import android.icu.impl.UtilityExtensions;
16
17/**
18 * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
19 * the transliteration in one direction from one set of characters or short
20 * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
21 * two such sets, one for the forward direction, and one for the reverse.
22 *
23 * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
24 * finding a matching rule at a given point in the text.  This is accomplished
25 * by the <code>findMatch()</code> method.
26 *
27 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
28 *
29 * @author Alan Liu
30 */
31class TransliterationRuleSet {
32    /**
33     * Vector of rules, in the order added.
34     */
35    private List<TransliterationRule> ruleVector;
36
37    /**
38     * Length of the longest preceding context
39     */
40    private int maxContextLength;
41
42    /**
43     * Sorted and indexed table of rules.  This is created by freeze() from
44     * the rules in ruleVector.  rules.length >= ruleVector.size(), and the
45     * references in rules[] are aliases of the references in ruleVector.
46     * A single rule in ruleVector is listed one or more times in rules[].
47     */
48    private TransliterationRule[] rules;
49
50    /**
51     * Index table.  For text having a first character c, compute x = c&0xFF.
52     * Now use rules[index[x]..index[x+1]-1].  This index table is created by
53     * freeze().
54     */
55    private int[] index;
56
57    /**
58     * Construct a new empty rule set.
59     */
60    public TransliterationRuleSet() {
61        ruleVector = new ArrayList<TransliterationRule>();
62        maxContextLength = 0;
63    }
64
65    /**
66     * Return the maximum context length.
67     * @return the length of the longest preceding context.
68     */
69    public int getMaximumContextLength() {
70        return maxContextLength;
71    }
72
73    /**
74     * Add a rule to this set.  Rules are added in order, and order is
75     * significant.
76     * @param rule the rule to add
77     */
78    public void addRule(TransliterationRule rule) {
79        ruleVector.add(rule);
80        int len;
81        if ((len = rule.getAnteContextLength()) > maxContextLength) {
82            maxContextLength = len;
83        }
84
85        rules = null;
86    }
87
88    /**
89     * Close this rule set to further additions, check it for masked rules,
90     * and index it to optimize performance.
91     * @exception IllegalArgumentException if some rules are masked
92     */
93    public void freeze() {
94        /* Construct the rule array and index table.  We reorder the
95         * rules by sorting them into 256 bins.  Each bin contains all
96         * rules matching the index value for that bin.  A rule
97         * matches an index value if string whose first key character
98         * has a low byte equal to the index value can match the rule.
99         *
100         * Each bin contains zero or more rules, in the same order
101         * they were found originally.  However, the total rules in
102         * the bins may exceed the number in the original vector,
103         * since rules that have a variable as their first key
104         * character will generally fall into more than one bin.
105         *
106         * That is, each bin contains all rules that either have that
107         * first index value as their first key character, or have
108         * a set containing the index value as their first character.
109         */
110        int n = ruleVector.size();
111        index = new int[257]; // [sic]
112        List<TransliterationRule> v = new ArrayList<TransliterationRule>(2*n); // heuristic; adjust as needed
113
114        /* Precompute the index values.  This saves a LOT of time.
115         */
116        int[] indexValue = new int[n];
117        for (int j=0; j<n; ++j) {
118            TransliterationRule r = ruleVector.get(j);
119            indexValue[j] = r.getIndexValue();
120        }
121        for (int x=0; x<256; ++x) {
122            index[x] = v.size();
123            for (int j=0; j<n; ++j) {
124                if (indexValue[j] >= 0) {
125                    if (indexValue[j] == x) {
126                        v.add(ruleVector.get(j));
127                    }
128                } else {
129                    // If the indexValue is < 0, then the first key character is
130                    // a set, and we must use the more time-consuming
131                    // matchesIndexValue check.  In practice this happens
132                    // rarely, so we seldom tread this code path.
133                    TransliterationRule r = ruleVector.get(j);
134                    if (r.matchesIndexValue(x)) {
135                        v.add(r);
136                    }
137                }
138            }
139        }
140        index[256] = v.size();
141
142        /* Freeze things into an array.
143         */
144        rules = new TransliterationRule[v.size()];
145        v.toArray(rules);
146
147        StringBuilder errors = null;
148
149        /* Check for masking.  This is MUCH faster than our old check,
150         * which was each rule against each following rule, since we
151         * only have to check for masking within each bin now.  It's
152         * 256*O(n2^2) instead of O(n1^2), where n1 is the total rule
153         * count, and n2 is the per-bin rule count.  But n2<<n1, so
154         * it's a big win.
155         */
156        for (int x=0; x<256; ++x) {
157            for (int j=index[x]; j<index[x+1]-1; ++j) {
158                TransliterationRule r1 = rules[j];
159                for (int k=j+1; k<index[x+1]; ++k) {
160                    TransliterationRule r2 = rules[k];
161                    if (r1.masks(r2)) {
162                        if (errors == null) {
163                            errors = new StringBuilder();
164                        } else {
165                            errors.append("\n");
166                        }
167                        errors.append("Rule " + r1 + " masks " + r2);
168                    }
169                }
170            }
171        }
172
173        if (errors != null) {
174            throw new IllegalArgumentException(errors.toString());
175        }
176    }
177
178    /**
179     * Transliterate the given text with the given UTransPosition
180     * indices.  Return TRUE if the transliteration should continue
181     * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
182     * Note that FALSE is only ever returned if isIncremental is TRUE.
183     * @param text the text to be transliterated
184     * @param pos the position indices, which will be updated
185     * @param incremental if TRUE, assume new text may be inserted
186     * at index.limit, and return FALSE if thre is a partial match.
187     * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
188     * indicating that transliteration should stop until more text
189     * arrives.
190     */
191    public boolean transliterate(Replaceable text,
192                                 Transliterator.Position pos,
193                                 boolean incremental) {
194        int indexByte = text.char32At(pos.start) & 0xFF;
195        for (int i=index[indexByte]; i<index[indexByte+1]; ++i) {
196            int m = rules[i].matchAndReplace(text, pos, incremental);
197            switch (m) {
198            case UnicodeMatcher.U_MATCH:
199                if (Transliterator.DEBUG) {
200                    System.out.println((incremental ? "Rule.i: match ":"Rule: match ") +
201                                       rules[i].toRule(true) + " => " +
202                                       UtilityExtensions.formatInput(text, pos));
203                }
204                return true;
205            case UnicodeMatcher.U_PARTIAL_MATCH:
206                if (Transliterator.DEBUG) {
207                    System.out.println((incremental ? "Rule.i: partial match ":"Rule: partial match ") +
208                                       rules[i].toRule(true) + " => " +
209                                       UtilityExtensions.formatInput(text, pos));
210                }
211                return false;
212                default:
213                    if (Transliterator.DEBUG) {
214                        System.out.println("Rule: no match " + rules[i]);
215                    }
216            }
217        }
218        // No match or partial match from any rule
219        pos.start += UTF16.getCharCount(text.char32At(pos.start));
220        if (Transliterator.DEBUG) {
221            System.out.println((incremental ? "Rule.i: no match => ":"Rule: no match => ") +
222                               UtilityExtensions.formatInput(text, pos));
223        }
224        return true;
225    }
226
227    /**
228     * Create rule strings that represents this rule set.
229     */
230    String toRules(boolean escapeUnprintable) {
231        int i;
232        int count = ruleVector.size();
233        StringBuilder ruleSource = new StringBuilder();
234        for (i=0; i<count; ++i) {
235            if (i != 0) {
236                ruleSource.append('\n');
237            }
238            TransliterationRule r = ruleVector.get(i);
239            ruleSource.append(r.toRule(escapeUnprintable));
240        }
241        return ruleSource.toString();
242    }
243
244    // TODO Handle the case where we have :: [a] ; a > |b ; b > c ;
245    // TODO Merge into r.addSourceTargetSet, to avoid duplicate testing
246    void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) {
247        UnicodeSet currentFilter = new UnicodeSet(filter);
248        UnicodeSet revisiting = new UnicodeSet();
249        int count = ruleVector.size();
250        for (int i=0; i<count; ++i) {
251            TransliterationRule r = ruleVector.get(i);
252            r.addSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.clear());
253            currentFilter.addAll(revisiting);
254        }
255    }
256
257}
258