17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2009-2014, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.text;
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashMap;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.ICUDebug;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.CollationElementIterator;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RbnfLenientScanner;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RbnfLenientScannerProvider;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RuleBasedCollator;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * implementation behind setLenientParseMode, which is based on Collator.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only.
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert@Deprecated
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean DEBUG = ICUDebug.enabled("rbnf");
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Map<String, RbnfLenientScanner> cache;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public RbnfScannerProviderImpl() {
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        cache = new HashMap<String, RbnfLenientScanner>();
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a collation-based scanner.
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Only primary differences are treated as significant.  This means that case
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * differences, accent differences, alternate spellings of the same letter
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * matching the text.  In many cases, numerals will be accepted in place of words
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or phrases as well.
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For example, all of the following will correctly parse as 255 in English in
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * lenient-parse mode:
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <br>"two hundred fifty-five"
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <br>"two hundred fifty five"
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <br>"TWO HUNDRED FIFTY-FIVE"
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <br>"twohundredfiftyfive"
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <br>"2 hundred fifty-5"
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The Collator used is determined by the locale that was
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * passed to this object on construction.  The description passed to this object
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * on construction may supply additional collation rules that are appended to the
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * end of the default collator for the locale, enabling additional equivalences
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (such as adding more ignorable characters or permitting spelled-out version of
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * symbols; see the demo program for examples).
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * It's important to emphasize that even strict parsing is relatively lenient: it
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * will accept some text that it won't produce as output.  In English, for example,
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * it will correctly parse "two hundred zero" and "fifteen hundred".
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public RbnfLenientScanner get(ULocale locale, String extras) {
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RbnfLenientScanner result = null;
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String key = locale.toString() + "/" + extras;
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        synchronized(cache) {
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = cache.get(key);
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (result != null) {
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return result;
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result = createScanner(locale, extras);
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        synchronized(cache) {
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cache.put(key, result);
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RuleBasedCollator collator = null;
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // create a default collator based on the locale,
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // then pull out that collator's rules, append any additional
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // rules specified in the description, and create a _new_
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // collator based on the combination of those rules
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (extras != null) {
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String rules = collator.getRules() + extras;
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                collator = new RuleBasedCollator(rules);
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        catch (Exception e) {
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // If we get here, it means we have a malformed set of
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // collation rules, which hopefully won't happen
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ///CLOVER:OFF
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (DEBUG){ // debug hook
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                e.printStackTrace(); System.out.println("++++");
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            collator = null;
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ///CLOVER:ON
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return new RbnfLenientScannerImpl(collator);
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final RuleBasedCollator collator;
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.collator = rbc;
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public boolean allIgnorable(String s) {
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationElementIterator iter = collator.getCollationElementIterator(s);
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int o = iter.next();
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (o != CollationElementIterator.NULLORDER
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                   && CollationElementIterator.primaryOrder(o) == 0) {
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                o = iter.next();
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return o == CollationElementIterator.NULLORDER;
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int[] findText(String str, String key, int startingAt) {
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int p = startingAt;
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int keyLen = 0;
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // basically just isolate smaller and smaller substrings of
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the target string (each running to the end of the string,
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and with the first one running from startingAt to the end)
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and then use prefixLength() to see if the search key is at
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the beginning of each substring.  This is excruciatingly
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // slow, but it will locate the key and tell use how long the
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // matching text was.
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (p < str.length() && keyLen == 0) {
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                keyLen = prefixLength(str.substring(p), key);
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (keyLen != 0) {
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return new int[] { p, keyLen };
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++p;
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if we make it to here, we didn't find it.  Return -1 for the
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // location.  The length should be ignored, but set it to 0,
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // which should be "safe"
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return new int[] { -1, 0 };
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ///CLOVER:OFF
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The following method contains the same signature as findText
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //  and has never been used by anything once.
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        @SuppressWarnings("unused")
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int[] findText2(String str, String key, int startingAt) {
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationElementIterator strIter = collator.getCollationElementIterator(str);
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationElementIterator keyIter = collator.getCollationElementIterator(key);
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int keyStart = -1;
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strIter.setOffset(startingAt);
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int oStr = strIter.next();
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int oKey = keyIter.next();
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (oKey != CollationElementIterator.NULLORDER) {
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (oStr != CollationElementIterator.NULLORDER &&
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       CollationElementIterator.primaryOrder(oStr) == 0)
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    oStr = strIter.next();
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (oKey != CollationElementIterator.NULLORDER &&
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       CollationElementIterator.primaryOrder(oKey) == 0)
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    oKey = keyIter.next();
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (oStr == CollationElementIterator.NULLORDER) {
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return new int[] { -1, 0 };
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (oKey == CollationElementIterator.NULLORDER) {
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (CollationElementIterator.primaryOrder(oStr) ==
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    CollationElementIterator.primaryOrder(oKey)) {
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    keyStart = strIter.getOffset();
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    oStr = strIter.next();
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    oKey = keyIter.next();
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (keyStart != -1) {
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        keyStart = -1;
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        keyIter.reset();
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        oStr = strIter.next();
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (oKey == CollationElementIterator.NULLORDER) {
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return new int[] { keyStart, strIter.getOffset() - keyStart };
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return new int[] { -1, 0 };
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ///CLOVER:ON
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int prefixLength(String str, String prefix) {
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Create two collation element iterators, one over the target string
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and another over the prefix.
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Previous code was matching "fifty-" against " fifty" and leaving
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the number " fifty-7" to parse as 43 (50 - 7).
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Also it seems that if we consume the entire prefix, that's ok even
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if we've consumed the entire string, so I switched the logic to
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // reflect this.
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationElementIterator strIter = collator.getCollationElementIterator(str);
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // match collation elements between the strings
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int oStr = strIter.next();
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int oPrefix = prefixIter.next();
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (oPrefix != CollationElementIterator.NULLORDER) {
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // skip over ignorable characters in the target string
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       CollationElementIterator.NULLORDER) {
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    oStr = strIter.next();
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // skip over ignorable characters in the prefix
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       CollationElementIterator.NULLORDER) {
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    oPrefix = prefixIter.next();
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // if skipping over ignorables brought to the end of
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the prefix, we DID match: drop out of the loop
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (oPrefix == CollationElementIterator.NULLORDER) {
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // if skipping over ignorables brought us to the end
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // of the target string, we didn't match and return 0
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (oStr == CollationElementIterator.NULLORDER) {
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0;
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // match collation elements from the two strings
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // (considering only primary differences).  If we
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // get a mismatch, dump out and return 0
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (CollationElementIterator.primaryOrder(oStr) !=
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    CollationElementIterator.primaryOrder(oPrefix)) {
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0;
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // otherwise, advance to the next character in each string
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // and loop (we drop out of the loop when we exhaust
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // collation elements in the prefix)
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                oStr = strIter.next();
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                oPrefix = prefixIter.next();
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int result = strIter.getOffset();
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (oStr != CollationElementIterator.NULLORDER) {
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                --result;
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
285