17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2009-2014, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.text; 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashMap; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.ICUDebug; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.CollationElementIterator; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RbnfLenientScanner; 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RbnfLenientScannerProvider; 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RuleBasedCollator; 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * implementation behind setLenientParseMode, which is based on Collator. 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert@Deprecated 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class RbnfScannerProviderImpl implements RbnfLenientScannerProvider { 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final boolean DEBUG = ICUDebug.enabled("rbnf"); 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Map<String, RbnfLenientScanner> cache; 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public RbnfScannerProviderImpl() { 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cache = new HashMap<String, RbnfLenientScanner>(); 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a collation-based scanner. 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Only primary differences are treated as significant. This means that case 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * differences, accent differences, alternate spellings of the same letter 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * matching the text. In many cases, numerals will be accepted in place of words 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or phrases as well. 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For example, all of the following will correctly parse as 255 in English in 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * lenient-parse mode: 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <br>"two hundred fifty-five" 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <br>"two hundred fifty five" 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <br>"TWO HUNDRED FIFTY-FIVE" 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <br>"twohundredfiftyfive" 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <br>"2 hundred fifty-5" 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The Collator used is determined by the locale that was 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * passed to this object on construction. The description passed to this object 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * on construction may supply additional collation rules that are appended to the 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * end of the default collator for the locale, enabling additional equivalences 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (such as adding more ignorable characters or permitting spelled-out version of 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * symbols; see the demo program for examples). 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It's important to emphasize that even strict parsing is relatively lenient: it 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * will accept some text that it won't produce as output. In English, for example, 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * it will correctly parse "two hundred zero" and "fifteen hundred". 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public RbnfLenientScanner get(ULocale locale, String extras) { 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RbnfLenientScanner result = null; 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String key = locale.toString() + "/" + extras; 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert synchronized(cache) { 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = cache.get(key); 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result != null) { 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = createScanner(locale, extras); 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert synchronized(cache) { 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cache.put(key, result); 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected RbnfLenientScanner createScanner(ULocale locale, String extras) { 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator collator = null; 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // create a default collator based on the locale, 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // then pull out that collator's rules, append any additional 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // rules specified in the description, and create a _new_ 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // collator based on the combination of those rules 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale()); 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (extras != null) { 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String rules = collator.getRules() + extras; 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator = new RuleBasedCollator(rules); 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert catch (Exception e) { 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If we get here, it means we have a malformed set of 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // collation rules, which hopefully won't happen 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ///CLOVER:OFF 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG){ // debug hook 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert e.printStackTrace(); System.out.println("++++"); 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator = null; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ///CLOVER:ON 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new RbnfLenientScannerImpl(collator); 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static class RbnfLenientScannerImpl implements RbnfLenientScanner { 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final RuleBasedCollator collator; 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private RbnfLenientScannerImpl(RuleBasedCollator rbc) { 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.collator = rbc; 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean allIgnorable(String s) { 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator iter = collator.getCollationElementIterator(s); 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int o = iter.next(); 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (o != CollationElementIterator.NULLORDER 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert && CollationElementIterator.primaryOrder(o) == 0) { 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert o = iter.next(); 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return o == CollationElementIterator.NULLORDER; 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int[] findText(String str, String key, int startingAt) { 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int p = startingAt; 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int keyLen = 0; 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // basically just isolate smaller and smaller substrings of 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the target string (each running to the end of the string, 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and with the first one running from startingAt to the end) 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and then use prefixLength() to see if the search key is at 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the beginning of each substring. This is excruciatingly 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // slow, but it will locate the key and tell use how long the 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // matching text was. 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (p < str.length() && keyLen == 0) { 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert keyLen = prefixLength(str.substring(p), key); 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (keyLen != 0) { 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new int[] { p, keyLen }; 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++p; 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if we make it to here, we didn't find it. Return -1 for the 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // location. The length should be ignored, but set it to 0, 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // which should be "safe" 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new int[] { -1, 0 }; 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ///CLOVER:OFF 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The following method contains the same signature as findText 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and has never been used by anything once. 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @SuppressWarnings("unused") 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int[] findText2(String str, String key, int startingAt) { 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator strIter = collator.getCollationElementIterator(str); 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator keyIter = collator.getCollationElementIterator(key); 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int keyStart = -1; 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strIter.setOffset(startingAt); 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int oStr = strIter.next(); 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int oKey = keyIter.next(); 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (oKey != CollationElementIterator.NULLORDER) { 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (oStr != CollationElementIterator.NULLORDER && 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator.primaryOrder(oStr) == 0) 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oStr = strIter.next(); 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (oKey != CollationElementIterator.NULLORDER && 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator.primaryOrder(oKey) == 0) 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oKey = keyIter.next(); 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (oStr == CollationElementIterator.NULLORDER) { 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new int[] { -1, 0 }; 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (oKey == CollationElementIterator.NULLORDER) { 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (CollationElementIterator.primaryOrder(oStr) == 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator.primaryOrder(oKey)) { 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert keyStart = strIter.getOffset(); 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oStr = strIter.next(); 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oKey = keyIter.next(); 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (keyStart != -1) { 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert keyStart = -1; 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert keyIter.reset(); 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oStr = strIter.next(); 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (oKey == CollationElementIterator.NULLORDER) { 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new int[] { keyStart, strIter.getOffset() - keyStart }; 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new int[] { -1, 0 }; 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ///CLOVER:ON 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int prefixLength(String str, String prefix) { 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Create two collation element iterators, one over the target string 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and another over the prefix. 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Previous code was matching "fifty-" against " fifty" and leaving 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the number " fifty-7" to parse as 43 (50 - 7). 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Also it seems that if we consume the entire prefix, that's ok even 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if we've consumed the entire string, so I switched the logic to 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // reflect this. 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator strIter = collator.getCollationElementIterator(str); 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix); 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // match collation elements between the strings 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int oStr = strIter.next(); 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int oPrefix = prefixIter.next(); 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (oPrefix != CollationElementIterator.NULLORDER) { 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // skip over ignorable characters in the target string 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr != 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator.NULLORDER) { 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oStr = strIter.next(); 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // skip over ignorable characters in the prefix 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix != 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator.NULLORDER) { 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oPrefix = prefixIter.next(); 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if skipping over ignorables brought to the end of 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the prefix, we DID match: drop out of the loop 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (oPrefix == CollationElementIterator.NULLORDER) { 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if skipping over ignorables brought us to the end 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // of the target string, we didn't match and return 0 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (oStr == CollationElementIterator.NULLORDER) { 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // match collation elements from the two strings 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (considering only primary differences). If we 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // get a mismatch, dump out and return 0 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (CollationElementIterator.primaryOrder(oStr) != 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator.primaryOrder(oPrefix)) { 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // otherwise, advance to the next character in each string 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and loop (we drop out of the loop when we exhaust 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // collation elements in the prefix) 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oStr = strIter.next(); 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oPrefix = prefixIter.next(); 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int result = strIter.getOffset(); 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (oStr != CollationElementIterator.NULLORDER) { 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --result; 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 285