1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/* 5 ******************************************************************************* 6 * Copyright (C) 1996-2010, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10package android.icu.text; 11 12import java.util.ArrayList; 13import java.util.List; 14 15import android.icu.impl.UtilityExtensions; 16 17/** 18 * A set of rules for a <code>RuleBasedTransliterator</code>. This set encodes 19 * the transliteration in one direction from one set of characters or short 20 * strings to another. A <code>RuleBasedTransliterator</code> consists of up to 21 * two such sets, one for the forward direction, and one for the reverse. 22 * 23 * <p>A <code>TransliterationRuleSet</code> has one important operation, that of 24 * finding a matching rule at a given point in the text. This is accomplished 25 * by the <code>findMatch()</code> method. 26 * 27 * <p>Copyright © IBM Corporation 1999. All rights reserved. 28 * 29 * @author Alan Liu 30 */ 31class TransliterationRuleSet { 32 /** 33 * Vector of rules, in the order added. 34 */ 35 private List<TransliterationRule> ruleVector; 36 37 /** 38 * Length of the longest preceding context 39 */ 40 private int maxContextLength; 41 42 /** 43 * Sorted and indexed table of rules. This is created by freeze() from 44 * the rules in ruleVector. rules.length >= ruleVector.size(), and the 45 * references in rules[] are aliases of the references in ruleVector. 46 * A single rule in ruleVector is listed one or more times in rules[]. 47 */ 48 private TransliterationRule[] rules; 49 50 /** 51 * Index table. For text having a first character c, compute x = c&0xFF. 52 * Now use rules[index[x]..index[x+1]-1]. This index table is created by 53 * freeze(). 54 */ 55 private int[] index; 56 57 /** 58 * Construct a new empty rule set. 59 */ 60 public TransliterationRuleSet() { 61 ruleVector = new ArrayList<TransliterationRule>(); 62 maxContextLength = 0; 63 } 64 65 /** 66 * Return the maximum context length. 67 * @return the length of the longest preceding context. 68 */ 69 public int getMaximumContextLength() { 70 return maxContextLength; 71 } 72 73 /** 74 * Add a rule to this set. Rules are added in order, and order is 75 * significant. 76 * @param rule the rule to add 77 */ 78 public void addRule(TransliterationRule rule) { 79 ruleVector.add(rule); 80 int len; 81 if ((len = rule.getAnteContextLength()) > maxContextLength) { 82 maxContextLength = len; 83 } 84 85 rules = null; 86 } 87 88 /** 89 * Close this rule set to further additions, check it for masked rules, 90 * and index it to optimize performance. 91 * @exception IllegalArgumentException if some rules are masked 92 */ 93 public void freeze() { 94 /* Construct the rule array and index table. We reorder the 95 * rules by sorting them into 256 bins. Each bin contains all 96 * rules matching the index value for that bin. A rule 97 * matches an index value if string whose first key character 98 * has a low byte equal to the index value can match the rule. 99 * 100 * Each bin contains zero or more rules, in the same order 101 * they were found originally. However, the total rules in 102 * the bins may exceed the number in the original vector, 103 * since rules that have a variable as their first key 104 * character will generally fall into more than one bin. 105 * 106 * That is, each bin contains all rules that either have that 107 * first index value as their first key character, or have 108 * a set containing the index value as their first character. 109 */ 110 int n = ruleVector.size(); 111 index = new int[257]; // [sic] 112 List<TransliterationRule> v = new ArrayList<TransliterationRule>(2*n); // heuristic; adjust as needed 113 114 /* Precompute the index values. This saves a LOT of time. 115 */ 116 int[] indexValue = new int[n]; 117 for (int j=0; j<n; ++j) { 118 TransliterationRule r = ruleVector.get(j); 119 indexValue[j] = r.getIndexValue(); 120 } 121 for (int x=0; x<256; ++x) { 122 index[x] = v.size(); 123 for (int j=0; j<n; ++j) { 124 if (indexValue[j] >= 0) { 125 if (indexValue[j] == x) { 126 v.add(ruleVector.get(j)); 127 } 128 } else { 129 // If the indexValue is < 0, then the first key character is 130 // a set, and we must use the more time-consuming 131 // matchesIndexValue check. In practice this happens 132 // rarely, so we seldom tread this code path. 133 TransliterationRule r = ruleVector.get(j); 134 if (r.matchesIndexValue(x)) { 135 v.add(r); 136 } 137 } 138 } 139 } 140 index[256] = v.size(); 141 142 /* Freeze things into an array. 143 */ 144 rules = new TransliterationRule[v.size()]; 145 v.toArray(rules); 146 147 StringBuilder errors = null; 148 149 /* Check for masking. This is MUCH faster than our old check, 150 * which was each rule against each following rule, since we 151 * only have to check for masking within each bin now. It's 152 * 256*O(n2^2) instead of O(n1^2), where n1 is the total rule 153 * count, and n2 is the per-bin rule count. But n2<<n1, so 154 * it's a big win. 155 */ 156 for (int x=0; x<256; ++x) { 157 for (int j=index[x]; j<index[x+1]-1; ++j) { 158 TransliterationRule r1 = rules[j]; 159 for (int k=j+1; k<index[x+1]; ++k) { 160 TransliterationRule r2 = rules[k]; 161 if (r1.masks(r2)) { 162 if (errors == null) { 163 errors = new StringBuilder(); 164 } else { 165 errors.append("\n"); 166 } 167 errors.append("Rule " + r1 + " masks " + r2); 168 } 169 } 170 } 171 } 172 173 if (errors != null) { 174 throw new IllegalArgumentException(errors.toString()); 175 } 176 } 177 178 /** 179 * Transliterate the given text with the given UTransPosition 180 * indices. Return TRUE if the transliteration should continue 181 * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). 182 * Note that FALSE is only ever returned if isIncremental is TRUE. 183 * @param text the text to be transliterated 184 * @param pos the position indices, which will be updated 185 * @param incremental if TRUE, assume new text may be inserted 186 * at index.limit, and return FALSE if thre is a partial match. 187 * @return TRUE unless a U_PARTIAL_MATCH has been obtained, 188 * indicating that transliteration should stop until more text 189 * arrives. 190 */ 191 public boolean transliterate(Replaceable text, 192 Transliterator.Position pos, 193 boolean incremental) { 194 int indexByte = text.char32At(pos.start) & 0xFF; 195 for (int i=index[indexByte]; i<index[indexByte+1]; ++i) { 196 int m = rules[i].matchAndReplace(text, pos, incremental); 197 switch (m) { 198 case UnicodeMatcher.U_MATCH: 199 if (Transliterator.DEBUG) { 200 System.out.println((incremental ? "Rule.i: match ":"Rule: match ") + 201 rules[i].toRule(true) + " => " + 202 UtilityExtensions.formatInput(text, pos)); 203 } 204 return true; 205 case UnicodeMatcher.U_PARTIAL_MATCH: 206 if (Transliterator.DEBUG) { 207 System.out.println((incremental ? "Rule.i: partial match ":"Rule: partial match ") + 208 rules[i].toRule(true) + " => " + 209 UtilityExtensions.formatInput(text, pos)); 210 } 211 return false; 212 default: 213 if (Transliterator.DEBUG) { 214 System.out.println("Rule: no match " + rules[i]); 215 } 216 } 217 } 218 // No match or partial match from any rule 219 pos.start += UTF16.getCharCount(text.char32At(pos.start)); 220 if (Transliterator.DEBUG) { 221 System.out.println((incremental ? "Rule.i: no match => ":"Rule: no match => ") + 222 UtilityExtensions.formatInput(text, pos)); 223 } 224 return true; 225 } 226 227 /** 228 * Create rule strings that represents this rule set. 229 */ 230 String toRules(boolean escapeUnprintable) { 231 int i; 232 int count = ruleVector.size(); 233 StringBuilder ruleSource = new StringBuilder(); 234 for (i=0; i<count; ++i) { 235 if (i != 0) { 236 ruleSource.append('\n'); 237 } 238 TransliterationRule r = ruleVector.get(i); 239 ruleSource.append(r.toRule(escapeUnprintable)); 240 } 241 return ruleSource.toString(); 242 } 243 244 // TODO Handle the case where we have :: [a] ; a > |b ; b > c ; 245 // TODO Merge into r.addSourceTargetSet, to avoid duplicate testing 246 void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { 247 UnicodeSet currentFilter = new UnicodeSet(filter); 248 UnicodeSet revisiting = new UnicodeSet(); 249 int count = ruleVector.size(); 250 for (int i=0; i<count; ++i) { 251 TransliterationRule r = ruleVector.get(i); 252 r.addSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.clear()); 253 currentFilter.addAll(revisiting); 254 } 255 } 256 257} 258