12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2010, International Business Machines Corporation and * 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.List; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.UtilityExtensions; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A set of rules for a <code>RuleBasedTransliterator</code>. This set encodes 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the transliteration in one direction from one set of characters or short 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * strings to another. A <code>RuleBasedTransliterator</code> consists of up to 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * two such sets, one for the forward direction, and one for the reverse. 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>A <code>TransliterationRuleSet</code> has one important operation, that of 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * finding a matching rule at a given point in the text. This is accomplished 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * by the <code>findMatch()</code> method. 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Copyright © IBM Corporation 1999. All rights reserved. 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Alan Liu 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass TransliterationRuleSet { 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Vector of rules, in the order added. 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private List<TransliterationRule> ruleVector; 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Length of the longest preceding context 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int maxContextLength; 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Sorted and indexed table of rules. This is created by freeze() from 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the rules in ruleVector. rules.length >= ruleVector.size(), and the 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * references in rules[] are aliases of the references in ruleVector. 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A single rule in ruleVector is listed one or more times in rules[]. 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private TransliterationRule[] rules; 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Index table. For text having a first character c, compute x = c&0xFF. 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Now use rules[index[x]..index[x+1]-1]. This index table is created by 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * freeze(). 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int[] index; 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Construct a new empty rule set. 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public TransliterationRuleSet() { 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleVector = new ArrayList<TransliterationRule>(); 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert maxContextLength = 0; 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return the maximum context length. 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the length of the longest preceding context. 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getMaximumContextLength() { 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return maxContextLength; 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Add a rule to this set. Rules are added in order, and order is 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * significant. 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param rule the rule to add 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void addRule(TransliterationRule rule) { 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleVector.add(rule); 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int len; 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((len = rule.getAnteContextLength()) > maxContextLength) { 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert maxContextLength = len; 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rules = null; 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Close this rule set to further additions, check it for masked rules, 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and index it to optimize performance. 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IllegalArgumentException if some rules are masked 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void freeze() { 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Construct the rule array and index table. We reorder the 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * rules by sorting them into 256 bins. Each bin contains all 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * rules matching the index value for that bin. A rule 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * matches an index value if string whose first key character 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * has a low byte equal to the index value can match the rule. 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Each bin contains zero or more rules, in the same order 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * they were found originally. However, the total rules in 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the bins may exceed the number in the original vector, 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * since rules that have a variable as their first key 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * character will generally fall into more than one bin. 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * That is, each bin contains all rules that either have that 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * first index value as their first key character, or have 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a set containing the index value as their first character. 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int n = ruleVector.size(); 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = new int[257]; // [sic] 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<TransliterationRule> v = new ArrayList<TransliterationRule>(2*n); // heuristic; adjust as needed 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Precompute the index values. This saves a LOT of time. 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int[] indexValue = new int[n]; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j=0; j<n; ++j) { 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TransliterationRule r = ruleVector.get(j); 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert indexValue[j] = r.getIndexValue(); 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int x=0; x<256; ++x) { 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index[x] = v.size(); 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j=0; j<n; ++j) { 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (indexValue[j] >= 0) { 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (indexValue[j] == x) { 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert v.add(ruleVector.get(j)); 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If the indexValue is < 0, then the first key character is 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // a set, and we must use the more time-consuming 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // matchesIndexValue check. In practice this happens 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // rarely, so we seldom tread this code path. 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TransliterationRule r = ruleVector.get(j); 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (r.matchesIndexValue(x)) { 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert v.add(r); 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index[256] = v.size(); 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Freeze things into an array. 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rules = new TransliterationRule[v.size()]; 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert v.toArray(rules); 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder errors = null; 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Check for masking. This is MUCH faster than our old check, 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * which was each rule against each following rule, since we 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * only have to check for masking within each bin now. It's 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 256*O(n2^2) instead of O(n1^2), where n1 is the total rule 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * count, and n2 is the per-bin rule count. But n2<<n1, so 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * it's a big win. 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int x=0; x<256; ++x) { 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j=index[x]; j<index[x+1]-1; ++j) { 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TransliterationRule r1 = rules[j]; 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int k=j+1; k<index[x+1]; ++k) { 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TransliterationRule r2 = rules[k]; 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (r1.masks(r2)) { 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (errors == null) { 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errors = new StringBuilder(); 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errors.append("\n"); 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errors.append("Rule " + r1 + " masks " + r2); 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (errors != null) { 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException(errors.toString()); 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterate the given text with the given UTransPosition 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * indices. Return TRUE if the transliteration should continue 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Note that FALSE is only ever returned if isIncremental is TRUE. 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the text to be transliterated 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param pos the position indices, which will be updated 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param incremental if TRUE, assume new text may be inserted 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * at index.limit, and return FALSE if thre is a partial match. 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return TRUE unless a U_PARTIAL_MATCH has been obtained, 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * indicating that transliteration should stop until more text 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * arrives. 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean transliterate(Replaceable text, 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator.Position pos, 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean incremental) { 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int indexByte = text.char32At(pos.start) & 0xFF; 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i=index[indexByte]; i<index[indexByte+1]; ++i) { 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int m = rules[i].matchAndReplace(text, pos, incremental); 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert switch (m) { 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case UnicodeMatcher.U_MATCH: 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (Transliterator.DEBUG) { 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println((incremental ? "Rule.i: match ":"Rule: match ") + 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rules[i].toRule(true) + " => " + 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(text, pos)); 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return true; 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case UnicodeMatcher.U_PARTIAL_MATCH: 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (Transliterator.DEBUG) { 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println((incremental ? "Rule.i: partial match ":"Rule: partial match ") + 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rules[i].toRule(true) + " => " + 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(text, pos)); 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert default: 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (Transliterator.DEBUG) { 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("Rule: no match " + rules[i]); 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // No match or partial match from any rule 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos.start += UTF16.getCharCount(text.char32At(pos.start)); 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (Transliterator.DEBUG) { 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println((incremental ? "Rule.i: no match => ":"Rule: no match => ") + 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(text, pos)); 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return true; 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Create rule strings that represents this rule set. 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String toRules(boolean escapeUnprintable) { 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i; 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int count = ruleVector.size(); 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder ruleSource = new StringBuilder(); 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (i=0; i<count; ++i) { 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (i != 0) { 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleSource.append('\n'); 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TransliterationRule r = ruleVector.get(i); 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleSource.append(r.toRule(escapeUnprintable)); 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ruleSource.toString(); 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO Handle the case where we have :: [a] ; a > |b ; b > c ; 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO Merge into r.addSourceTargetSet, to avoid duplicate testing 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet currentFilter = new UnicodeSet(filter); 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet revisiting = new UnicodeSet(); 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int count = ruleVector.size(); 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i=0; i<count; ++i) { 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TransliterationRule r = ruleVector.get(i); 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert r.addSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.clear()); 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert currentFilter.addAll(revisiting); 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 257