17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
3bee65486a185907111f3be60992433e133ec0e32Scott Russell * Copyright (C) 1996-2016, International Business Machines Corporation and
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.StringCharacterIterator;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Locale;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUException;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// Java porting note:
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        The ICU4C implementation contains dead code in many places.
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      While porting the ICU4C linear search implementation, this dead code
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      was not fully ported. The code blocks tagged by "// *** Boyer-Moore ***"
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      are those dead code blocks, still available in ICU4C.
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        The ICU4C implementation does not seem to handle UCharacterIterator pointing
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      to a fragment of text properly. ICU4J uses CharacterIterator to navigate through
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      the input text. We need to carefully review the code ported from ICU4C
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      assuming the start index is 0.
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        ICU4C implementation initializes pattern.CE and pattern.PCE. It looks like
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      CE is no longer used, except in a few places checking CELength. It looks like this
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      is a leftover from already-disabled Boyer-Moore search code. This Java implementation
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      preserves the code, but we should clean this up later.
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>StringSearch</tt> is a {@link SearchIterator} that provides
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * language-sensitive text searching based on the comparison rules defined
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * in a {@link RuleBasedCollator} object.
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * StringSearch ensures that language eccentricity can be
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * handled, e.g. for the German collator, characters &szlig; and SS will be matched
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * if case is chosen to be ignored.
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "ICU Collation Design Document"</a> for more information.
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * There are 2 match options for selection:<br>
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Let S' be the sub-string of a text string S between the offsets start and
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * end [start, end].
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <br>
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A pattern string P matches a text string S at the offsets [start, end]
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * if
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * option 1. Some canonical equivalent of P matches some canonical equivalent
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *           of S'
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * option 2. P matches S' and if P starts or ends with a combining mark,
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *           there exists no non-ignorable combining mark before or after S?
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *           in S respectively.
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Option 2. is the default.
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This search has APIs similar to that of other text iteration mechanisms
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * such as the break iterators in {@link BreakIterator}. Using these
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * APIs, it is easy to scan through text looking for all occurrences of
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a given pattern. This search iterator allows changing of direction by
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * calling a {@link #reset} followed by a {@link #next} or {@link #previous}.
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Though a direction change can occur without calling {@link #reset} first,
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this operation comes with some speed penalty.
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Match results in the forward direction will match the result matches in
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the backwards direction in the reverse order
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link SearchIterator} provides APIs to specify the starting position
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * within the text string to be searched, e.g. {@link SearchIterator#setIndex setIndex},
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link SearchIterator#preceding preceding} and {@link SearchIterator#following following}.
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Since the starting position will be set as it is specified, please take note that
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * there are some danger points at which the search may render incorrect
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * results:
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul>
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li> In the midst of a substring that requires normalization.
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li> If the following match is to be found, the position should not be the
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      second character which requires swapping with the preceding
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      character. Vice versa, if the preceding match is to be found, the
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      position to search from should not be the first character which
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      requires swapping with the next character. E.g certain Thai and
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      Lao characters require swapping.
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li> If a following pattern match is to be found, any position within a
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      contracting sequence except the first will fail. Vice versa if a
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      preceding pattern match is to be found, an invalid starting point
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      would be any character within a contracting sequence except the last.
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </ul>
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A {@link BreakIterator} can be used if only matches at logical breaks are desired.
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Using a {@link BreakIterator} will only give you results that exactly matches the
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * boundaries given by the {@link BreakIterator}. For instance the pattern "e" will
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * not be found in the string "\u00e9" if a character break iterator is used.
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Options are provided to handle overlapping matches.
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * E.g. In English, overlapping matches produces the result 0 and 2
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for the pattern "abab" in the text "ababab", where mutually
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * exclusive matches only produces the result of 0.
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Options are also provided to implement "asymmetric search" as described in
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search">
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UTS #10 Unicode Collation Algorithm</a>, specifically the ElementComparisonType
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * values.
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Though collator attributes will be taken into consideration while
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * performing matches, there are no APIs here for setting and getting the
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * attributes. These attributes can be set by getting the collator
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * from {@link #getCollator} and using the APIs in {@link RuleBasedCollator}.
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Lastly to update <tt>StringSearch</tt> to the new collator attributes,
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #reset} has to be called.
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Restriction: <br>
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Currently there are no composite characters that consists of a
113bee65486a185907111f3be60992433e133ec0e32Scott Russell * character with combining class &gt; 0 before a character with combining
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * class == 0. However, if such a character exists in the future,
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>StringSearch</tt> does not guarantee the results for option 1.
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Consult the {@link SearchIterator} documentation for information on
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and examples of how to use instances of this class to implement text
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * searching.
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Note, <tt>StringSearch</tt> is not to be subclassed.
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p>
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see SearchIterator
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see RuleBasedCollator
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Laura Werner, synwee
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// internal notes: all methods do not guarantee the correct status of the
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// characteriterator. the caller has to maintain the original index position
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// if necessary. methods could change the index position as it deems fit
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class StringSearch extends SearchIterator {
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Pattern pattern_;
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private RuleBasedCollator collator_;
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // positions within the collation element iterator is used to determine
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // if we are at the start of the text.
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationElementIterator textIter_;
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationPCE textProcessedIter_;
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // utility collation element, used throughout program for temporary
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // iteration.
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationElementIterator utilIter_;
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
145bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private Normalizer2 nfd_;
146bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int strength_;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int ceMask_;
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int variableTop_;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean toShift_;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // *** Boyer-Moore ***
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private char[] canonicalPrefixAccents_;
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private char[] canonicalSuffixAccents_;
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules defined in
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the argument collator to search for argument pattern in the argument
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * target text. The argument <code>breakiter</code> is used to define logical matches.
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See super class documentation for more details on the use of the target
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * text and {@link BreakIterator}.
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator {@link RuleBasedCollator} that defines the language rules
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param breakiter A {@link BreakIterator} that is used to determine the
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                boundaries of a logical match. This argument can be null.
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see BreakIterator
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see RuleBasedCollator
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, CharacterIterator target, RuleBasedCollator collator,
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BreakIterator breakiter) {
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // This implementation is ported from ICU4C usearch_open()
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(target, breakiter);
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // string search does not really work when numeric collation is turned on
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (collator.getNumericCollation()) {
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new UnsupportedOperationException("Numeric collation is not supported by StringSearch");
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        collator_ = collator;
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        strength_ = collator.getStrength();
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ceMask_ = getMask(strength_);
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        toShift_ = collator.isAlternateHandlingShifted();
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        variableTop_ = collator.getVariableTop();
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
192bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        nfd_ = Normalizer2.getNFDInstance();
193bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_ = new Pattern(pattern);
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(0);
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.matchedIndex_ = DONE;
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        utilIter_ = null;
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_ = new CollationElementIterator(target, collator);
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textProcessedIter_ = null;
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // This is done by super class constructor
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isOverlap_ = false;
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isCanonicalMatch_ = false;
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isForwardSearching_ = true;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = true;
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ULocale collLocale = collator.getLocale(ULocale.VALID_LOCALE);
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.internalBreakIter_ = BreakIterator.getCharacterInstance(collLocale == null ? ULocale.ROOT : collLocale);
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.internalBreakIter_.setText((CharacterIterator)target.clone());  // We need to create a clone
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        initialize();
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules defined in
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the argument collator to search for argument pattern in the argument
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * target text. No {@link BreakIterator}s are set to test for logical matches.
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator {@link RuleBasedCollator} that defines the language rules
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see RuleBasedCollator
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, CharacterIterator target, RuleBasedCollator collator) {
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(pattern, target, collator, null);
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules and
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break iterator rules defined in the argument locale to search for
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * argument pattern in the argument target text.
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param locale locale to use for language and break iterator rules
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0. ClassCastException thrown if the collator for
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the specified locale is not a RuleBasedCollator.
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, CharacterIterator target, Locale locale) {
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(pattern, target, ULocale.forLocale(locale));
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules and
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break iterator rules defined in the argument locale to search for
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * argument pattern in the argument target text.
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See super class documentation for more details on the use of the target
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * text and {@link BreakIterator}.
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param locale locale to use for language and break iterator rules
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0. ClassCastException thrown if the collator for
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the specified locale is not a RuleBasedCollator.
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see BreakIterator
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see RuleBasedCollator
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see SearchIterator
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.2
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, CharacterIterator target, ULocale locale) {
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(pattern, target, (RuleBasedCollator) Collator.getInstance(locale), null);
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules and
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break iterator rules defined in the default locale to search for
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * argument pattern in the argument target text.
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0. ClassCastException thrown if the collator for
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the default locale is not a RuleBasedCollator.
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, String target) {
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(pattern, new StringCharacterIterator(target),
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (RuleBasedCollator) Collator.getInstance(), null);
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets the {@link RuleBasedCollator} used for the language rules.
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Since <tt>StringSearch</tt> depends on the returned {@link RuleBasedCollator}, any
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * changes to the {@link RuleBasedCollator} result should follow with a call to
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * either {@link #reset()} or {@link #setCollator(RuleBasedCollator)} to ensure the correct
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * search behavior.
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </p>
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return {@link RuleBasedCollator} used by this <tt>StringSearch</tt>
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see RuleBasedCollator
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setCollator
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public RuleBasedCollator getCollator() {
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return collator_;
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the {@link RuleBasedCollator} to be used for language-specific searching.
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator's position will not be changed by this method.
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator to use for this <tt>StringSearch</tt>
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when collator is null
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getCollator
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setCollator(RuleBasedCollator collator) {
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (collator == null) {
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("Collator can not be null");
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        collator_ = collator;
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ceMask_ = getMask(collator_.getStrength());
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ULocale collLocale = collator.getLocale(ULocale.VALID_LOCALE);
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.internalBreakIter_ = BreakIterator.getCharacterInstance(collLocale == null ? ULocale.ROOT : collLocale);
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.internalBreakIter_.setText((CharacterIterator)search_.text().clone());  // We need to create a clone
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        toShift_ = collator.isAlternateHandlingShifted();
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        variableTop_ = collator.getVariableTop();
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_ = new CollationElementIterator(pattern_.text_, collator);
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        utilIter_ = new CollationElementIterator(pattern_.text_, collator);
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // initialize() _after_ setting the iterators for the new collator.
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        initialize();
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the pattern for which <tt>StringSearch</tt> is searching for.
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the pattern searched for
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String getPattern() {
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return pattern_.text_;
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the pattern to search for.
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator's position will not be changed by this method.
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern for searching
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getPattern
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IllegalArgumentException thrown if pattern is null or of
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *               length 0
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setPattern(String pattern) {
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern == null || pattern.length() <= 0) {
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException(
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "Pattern to search for can not be null or of length 0");
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.text_ = pattern;
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        initialize();
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Determines whether canonical matches (option 1, as described in the
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * class documentation) is set.
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See setCanonical(boolean) for more information.
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setCanonical
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if canonical matches is set, false otherwise
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //TODO: hoist this to SearchIterator
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isCanonical() {
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.isCanonicalMatch_;
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the canonical match mode. See class documentation for details.
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The default setting for this property is false.
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param allowCanonical flag indicator if canonical matches are allowed
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #isCanonical
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //TODO: hoist this to SearchIterator
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setCanonical(boolean allowCanonical) {
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isCanonicalMatch_ = allowCanonical;
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setTarget(CharacterIterator text) {
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super.setTarget(text);
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_.setText(text);
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getIndex() {
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = textIter_.getOffset();
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (isOutOfBounds(search_.beginIndex(), search_.endIndex(), result)) {
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return DONE;
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setIndex(int position) {
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Java porting note: This method is equivalent to setOffset() in ICU4C.
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ICU4C SearchIterator::setOffset() is a pure virtual method, while
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ICU4J SearchIterator.setIndex() is not abstract method.
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super.setIndex(position);
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_.setOffset(position);
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void reset() {
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // reset is setting the attributes that are already in
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // string search, hence all attributes in the collator should
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // be retrieved without any problems
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean sameCollAttribute = true;
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ceMask;
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean shift;
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int varTop;
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // **** hack to deal w/ how processed CEs encode quaternary ****
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int newStrength = collator_.getStrength();
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((strength_ < Collator.QUATERNARY && newStrength >= Collator.QUATERNARY)
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || (strength_ >= Collator.QUATERNARY && newStrength < Collator.QUATERNARY)) {
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sameCollAttribute = false;
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        strength_ = collator_.getStrength();
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ceMask = getMask(strength_);
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ceMask_ != ceMask) {
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ceMask_ = ceMask;
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sameCollAttribute = false;
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        shift = collator_.isAlternateHandlingShifted();
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (toShift_ != shift) {
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            toShift_ = shift;
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sameCollAttribute = false;
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        varTop = collator_.getVariableTop();
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (variableTop_ != varTop) {
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            variableTop_ = varTop;
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sameCollAttribute = false;
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (!sameCollAttribute) {
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            initialize();
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_.setText(search_.text());
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(0);
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.matchedIndex_ = DONE;
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isOverlap_ = false;
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isCanonicalMatch_ = false;
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isForwardSearching_ = true;
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = true;
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected int handleNext(int position) {
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ == 0) {
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.matchedIndex_ = search_.matchedIndex_ == DONE ?
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    getIndex() : search_.matchedIndex_ + 1;
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.setMatchedLength(0);
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(search_.matchedIndex_);
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ == search_.endIndex()) {
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.matchedIndex_ = DONE;
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedLength() <= 0) {
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the flipping direction issue has already been handled
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // in next()
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // for boundary check purposes. this will ensure that the
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // next match will not preceed the current offset
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // note search_.matchedIndex_ will always be set to something
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // in the code
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.matchedIndex_ = position - 1;
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(position);
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // ICU4C comment:
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if strsrch_->breakIter is always the same as m_breakiterator_
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // then we don't need to check the match boundaries here because
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // usearch_handleNextXXX will already have done it.
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.isCanonicalMatch_) {
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // *could* actually use exact here 'cause no extra accents allowed...
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                handleNextCanonical();
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                handleNextExact();
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ == DONE) {
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textIter_.setOffset(search_.endIndex());
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textIter_.setOffset(search_.matchedIndex_);
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return search_.matchedIndex_;
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return DONE;
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected int handlePrevious(int position) {
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ == 0) {
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.matchedIndex_ =
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    search_.matchedIndex_ == DONE ? getIndex() : search_.matchedIndex_;
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ == search_.beginIndex()) {
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setMatchNotFound();
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.matchedIndex_--;
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textIter_.setOffset(search_.matchedIndex_);
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.setMatchedLength(0);
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(position);
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.isCanonicalMatch_) {
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // *could* use exact match here since extra accents *not* allowed!
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                handlePreviousCanonical();
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                handlePreviousExact();
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.matchedIndex_;
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ------------------ Internal implementation code ---------------------------
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int INITIAL_ARRAY_SIZE_ = 256;
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // *** Boyer-Moore ***
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private static final Normalizer2Impl nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private static final int LAST_BYTE_MASK_ = 0xff;
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int PRIMARYORDERMASK = 0xffff0000;
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int SECONDARYORDERMASK = 0x0000ff00;
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int TERTIARYORDERMASK = 0x000000ff;
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Getting the mask for collation strength
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param strength collation strength
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return collation element mask
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int getMask(int strength) {
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        switch (strength) {
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case Collator.PRIMARY:
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return PRIMARYORDERMASK;
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case Collator.SECONDARY:
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return SECONDARYORDERMASK | PRIMARYORDERMASK;
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        default:
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return TERTIARYORDERMASK | SECONDARYORDERMASK | PRIMARYORDERMASK;
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // *** Boyer-Moore ***
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final char getFCD(String str, int offset) {
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char ch = str.charAt(offset);
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ch < 0x180) {
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (char) nfcImpl_.getFCD16FromBelow180(ch);
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (nfcImpl_.singleLeadMightHaveNonZeroFCD16(ch)) {
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!Character.isHighSurrogate(ch)) {
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return (char) nfcImpl_.getFCD16FromNormData(ch);
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char c2;
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (++offset < str.length() && Character.isLowSurrogate(c2 = str.charAt(offset))) {
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return (char) nfcImpl_.getFCD16FromNormData(Character.toCodePoint(ch, c2));
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 0;
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final char getFCD(int c) {
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (char)nfcImpl_.getFCD16(c);
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Getting the modified collation elements taking into account the collation
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * attributes.
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sourcece
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the modified collation element
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int getCE(int sourcece) {
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // note for tertiary we can't use the collator->tertiaryMask, that
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // is a preprocessed mask that takes into account case options. since
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we are only concerned with exact matches, we don't need that.
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sourcece &= ceMask_;
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (toShift_) {
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // alternate handling here, since only the 16 most significant digits
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // is only used, we can safely do a compare without masking
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if the ce is a variable, we mask and get only the primary values
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // no shifting to quartenary is required since all primary values
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // less than variabletop will need to be masked off anyway.
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (variableTop_ > sourcece) {
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (strength_ >= Collator.QUATERNARY) {
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sourcece &= PRIMARYORDERMASK;
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sourcece = CollationElementIterator.IGNORABLE;
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (strength_ >= Collator.QUATERNARY && sourcece == CollationElementIterator.IGNORABLE) {
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sourcece = 0xFFFF;
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return sourcece;
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Direct port of ICU4C static int32_t * addTouint32_tArray(...) in usearch.cpp.
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is used for appending a PCE to Pattern.PCE_ buffer. We probably should
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * implement this in Pattern class.
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destination target array
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset destination offset to add value
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destinationlength target array size
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param value to be added
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param increments incremental size expected
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return new destination array, destination if there was no new allocation
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int[] addToIntArray(int[] destination, int offset, int destinationlength,
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int value, int increments) {
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int newlength = destinationlength;
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset + 1 == newlength) {
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newlength += increments;
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int temp[] = new int[newlength];
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.arraycopy(destination, 0, temp, 0, offset);
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            destination = temp;
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        destination[offset] = value;
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return destination;
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Direct port of ICU4C static int64_t * addTouint64_tArray(...) in usearch.cpp.
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is used for appending a PCE to Pattern.PCE_ buffer. We probably should
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * implement this in Pattern class.
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destination target array
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset destination offset to add value
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destinationlength target array size
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param value to be added
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param increments incremental size expected
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return new destination array, destination if there was no new allocation
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long[] addToLongArray(long[] destination, int offset, int destinationlength,
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long value, int increments) {
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int newlength = destinationlength;
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset + 1 == newlength) {
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newlength += increments;
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long temp[] = new long[newlength];
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.arraycopy(destination, 0, temp, 0, offset);
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            destination = temp;
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        destination[offset] = value;
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return destination;
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializing the ce table for a pattern.
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Stores non-ignorable collation keys.
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Table size will be estimated by the size of the pattern text. Table
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * expansion will be perform as we go along. Adding 1 to ensure that the table
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * size definitely increases.
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return total number of expansions
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: We probably do not need Pattern CE table.
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int initializePatternCETable() {
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int[] cetable = new int[INITIAL_ARRAY_SIZE_];
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int cetablesize = cetable.length;
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patternlength = pattern_.text_.length();
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationElementIterator coleiter = utilIter_;
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (coleiter == null) {
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coleiter = new CollationElementIterator(pattern_.text_, collator_);
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            utilIter_ = coleiter;
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coleiter.setText(pattern_.text_);
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int offset = 0;
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = 0;
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ce;
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while ((ce = coleiter.next()) != CollationElementIterator.NULLORDER) {
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int newce = getCE(ce);
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (newce != CollationElementIterator.IGNORABLE /* 0 */) {
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int[] temp = addToIntArray(cetable, offset, cetablesize, newce,
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        patternlength - coleiter.getOffset() + 1);
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offset++;
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cetable = temp;
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result += (coleiter.getMaxExpansion(ce) - 1);
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        cetable[offset] = 0;
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.CE_ = cetable;
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.CELength_ = offset;
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializing the pce table for a pattern.
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Stores non-ignorable collation keys.
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Table size will be estimated by the size of the pattern text. Table
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * expansion will be perform as we go along. Adding 1 to ensure that the table
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * size definitely increases.
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return total number of expansions
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int initializePatternPCETable() {
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long[] pcetable = new long[INITIAL_ARRAY_SIZE_];
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int pcetablesize = pcetable.length;
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patternlength = pattern_.text_.length();
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationElementIterator coleiter = utilIter_;
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (coleiter == null) {
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coleiter = new CollationElementIterator(pattern_.text_, collator_);
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            utilIter_ = coleiter;
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coleiter.setText(pattern_.text_);
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int offset = 0;
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = 0;
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long pce;
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationPCE iter = new CollationPCE(coleiter);
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ** Should processed CEs be signed or unsigned?
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ** (the rest of the code in this file seems to play fast-and-loose with
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ** whether a CE is signed or unsigned. For example, look at routine above this one.)
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while ((pce = iter.nextProcessed(null)) != CollationPCE.PROCESSED_NULLORDER) {
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long[] temp = addToLongArray(pcetable, offset, pcetablesize, pce, patternlength - coleiter.getOffset() + 1);
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset++;
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pcetable = temp;
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pcetable[offset] = 0;
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.PCE_ = pcetable;
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.PCELength_ = offset;
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: This method only triggers initializePatternCETable(), which is probably no
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //      longer needed.
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int initializePattern() {
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Since the strength is primary, accents are ignored in the pattern.
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // *** Boyer-Moore ***
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (strength_ == Collator.PRIMARY) {
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.hasPrefixAccents_ = false;
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.hasSuffixAccents_ = false;
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.hasPrefixAccents_ = (getFCD(pattern_.text_, 0) >>> SECOND_LAST_BYTE_SHIFT_) != 0;
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.hasSuffixAccents_ = (getFCD(pattern_.text_.codePointBefore(pattern_.text_.length())) & LAST_BYTE_MASK_) != 0;
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        */
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.PCE_ = null;
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // since intializePattern is an internal method status is a success.
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return initializePatternCETable();
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // *** Boyer-Moore ***
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     private final void setShiftTable(char shift[],
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         char backshift[],
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         int cetable[], int cesize,
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         int expansionsize,
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         int defaultforward,
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         int defaultbackward) {
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         // No implementation
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     }
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: This method only triggers initializePattern(), which is probably no
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //      longer needed.
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void initialize() {
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* int expandlength = */ initializePattern();
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // *** Boyer-Moore ***
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ > 0) {
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int cesize = pattern_.CELength_;
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int minlength = cesize > expandlength ? cesize - expandlength : 1;
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.defaultShiftSize_ = minlength;
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setShiftTable(pattern_.shift_, pattern_.backShift_, pattern_.CE_, cesize,
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    expandlength, minlength, minlength);
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return pattern_.defaultShiftSize_;
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        */
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void setMatchNotFound() {
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super.setMatchNotFound();
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // SearchIterator#setMatchNotFound() does following:
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //      search_.matchedIndex_ = DONE;
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //      search_.setMatchedLength(0);
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.isForwardSearching_) {
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(search_.text().getEndIndex());
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(0);
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Checks if the offset runs out of the text string range
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param textstart offset of the first character in the range
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param textlimit limit offset of the text string range
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset to test
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if offset is out of bounds, false otherwise
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean isOutOfBounds(int textstart, int textlimit, int offset) {
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return offset < textstart || offset > textlimit;
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Checks for identical match
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param start offset of possible match
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param end offset of possible match
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return TRUE if identical match is found
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean checkIdentical(int start, int end) {
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (strength_ != Collator.IDENTICAL) {
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: We could use Normalizer::compare() or similar, but for short strings
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // which may not be in FCD it might be faster to just NFD them.
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String textstr = getString(targetText, start, end - start);
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Normalizer.quickCheck(textstr, Normalizer.NFD, 0) == Normalizer.NO) {
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textstr = Normalizer.decompose(textstr, false);
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String patternstr = pattern_.text_;
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Normalizer.quickCheck(patternstr, Normalizer.NFD, 0) == Normalizer.NO) {
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            patternstr = Normalizer.decompose(patternstr, false);
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return textstr.equals(patternstr);
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean initTextProcessedIter() {
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (textProcessedIter_ == null) {
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textProcessedIter_ = new CollationPCE(textIter_);
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textProcessedIter_.init(textIter_);
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Find the next break boundary after startIndex. If the UStringSearch object
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * has an external break iterator, use that. Otherwise use the internal character
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break iterator.
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int nextBoundaryAfter(int startIndex) {
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIterator breakiterator = search_.breakIter();
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (breakiterator == null) {
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            breakiterator = search_.internalBreakIter_;
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (breakiterator != null) {
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return breakiterator.following(startIndex);
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return startIndex;
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns TRUE if index is on a break boundary. If the UStringSearch
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * has an external break iterator, test using that, otherwise test
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * using the internal character break iterator.
9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isBreakBoundary(int index) {
9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIterator breakiterator = search_.breakIter();
9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (breakiterator == null) {
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            breakiterator = search_.internalBreakIter_;
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (breakiterator != null && breakiterator.isBoundary(index));
9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note: Followings are corresponding to UCompareCEsResult enum
9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CE_MATCH = -1;
9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CE_NO_MATCH = 0;
9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CE_SKIP_TARG = 1;
9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CE_SKIP_PATN = 2;
9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int CE_LEVEL2_BASE = 0x00000005;
9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int CE_LEVEL3_BASE = 0x00050000;
9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int compareCE64s(long targCE, long patCE, ElementComparisonType compareType) {
9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (targCE == patCE) {
9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return CE_MATCH;
9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (compareType == ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return CE_NO_MATCH;
9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long targCEshifted = targCE >>> 32;
9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long patCEshifted = patCE >>> 32;
9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long mask;
9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mask = 0xFFFF0000L;
9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targLev1 = (int)(targCEshifted & mask);
9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patLev1 = (int)(patCEshifted & mask);
9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (targLev1 != patLev1) {
9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (targLev1 == 0) {
9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CE_SKIP_TARG;
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (patLev1 == 0
9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    && compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD) {
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CE_SKIP_PATN;
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return CE_NO_MATCH;
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mask = 0x0000FFFFL;
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targLev2 = (int)(targCEshifted & mask);
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patLev2 = (int)(patCEshifted & mask);
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (targLev2 != patLev2) {
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (targLev2 == 0) {
9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CE_SKIP_TARG;
9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (patLev2 == 0
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    && compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD) {
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CE_SKIP_PATN;
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (patLev2 == CE_LEVEL2_BASE ||
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD &&
9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targLev2 == CE_LEVEL2_BASE)) ? CE_MATCH : CE_NO_MATCH;
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mask = 0xFFFF0000L;
9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targLev3 = (int)(targCE & mask);
9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patLev3 = (int)(patCE & mask);
9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (targLev3 != patLev3) {
9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (patLev3 == CE_LEVEL3_BASE ||
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD &&
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targLev3 == CE_LEVEL3_BASE) )? CE_MATCH: CE_NO_MATCH;
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return CE_MATCH;
9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * An object used for receiving matched index in search() and
9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * searchBackwards().
9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class Match {
9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int start_ = -1;
10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limit_ = -1;
10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean search(int startIdx, Match m) {
10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Input parameter sanity check.
10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ == 0
10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || startIdx < search_.beginIndex()
10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || startIdx > search_.endIndex()) {
10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("search(" + startIdx + ", m) - expected position to be between " +
10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    search_.beginIndex() + " and " + search_.endIndex());
10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.PCE_ == null) {
10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            initializePatternPCETable();
10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_.setOffset(startIdx);
10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEBuffer ceb = new CEBuffer(this);
10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targetIx = 0;
10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI targetCEI = null;
10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patIx;
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean found;
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mStart = -1;
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mLimit = -1;
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int minLimit;
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int maxLimit;
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Outer loop moves over match starting positions in the
10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //      target CE space.
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Here we see the target as a sequence of collation elements, resulting from the following:
10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 1. Target characters were decomposed, and (if appropriate) other compressions and expansions are applied
10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    (for example, digraphs such as IJ may be broken into two characters).
10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 2. An int64_t CE weight is determined for each resulting unit (high 16 bits are primary strength, next
10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    16 bits are secondary, next 16 (the high 16 bits of the low 32-bit half) are tertiary. Any of these
10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    fields that are for strengths below that of the collator are set to 0. If this makes the int64_t
10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    CE weight 0 (as for a combining diacritic with secondary weight when the collator strentgh is primary),
10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    then the CE is deleted, so the following code sees only CEs that are relevant.
10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // For each CE, the lowIndex and highIndex correspond to where this CE begins and ends in the original text.
10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If lowIndex==highIndex, either the CE resulted from an expansion/decomposition of one of the original text
10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // characters, or the CE marks the limit of the target text (in which case the CE weight is UCOL_PROCESSED_NULLORDER).
10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (targetIx = 0; ; targetIx++) {
10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            found = true;
10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Inner loop checks for a match beginning at each
10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // position from the outer loop.
10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int targetIxOffset = 0;
10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long patCE = 0;
10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // For targetIx > 0, this ceb.get gets a CE that is as far back in the ring buffer
10497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // (compared to the last CE fetched for the previous targetIx value) as we need to go
10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for this targetIx value, so if it is non-NULL then other ceb.get calls should be OK.
10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI firstCEI = ceb.get(targetIx);
10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (firstCEI == null) {
10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new ICUException("CEBuffer.get(" + targetIx + ") returned null.");
10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (patIx = 0; patIx < pattern_.PCELength_; patIx++) {
10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                patCE = pattern_.PCE_[patIx];
10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                targetCEI = ceb.get(targetIx + patIx + targetIxOffset);
10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Compare CE from target string with CE from the pattern.
10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // which will fail the compare, below.
10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ceMatch = compareCE64s(targetCEI.ce_, patCE, search_.elementComparisonType_);
10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ceMatch == CE_NO_MATCH) {
10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    found = false;
10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
10667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if (ceMatch > CE_NO_MATCH) {
10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (ceMatch == CE_SKIP_TARG) {
10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // redo with same patCE, next targCE
10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        patIx--;
10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targetIxOffset++;
10717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else { // ceMatch == CE_SKIP_PATN
10727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // redo with same targCE, next patCE
10737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targetIxOffset--;
10747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
10757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
10767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            targetIxOffset += pattern_.PCELength_; // this is now the offset in target CE space to end of the match so far
10787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!found && ((targetCEI == null) || (targetCEI.ce_ != CollationPCE.PROCESSED_NULLORDER))) {
10807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No match at this targetIx.  Try again at the next.
10817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
10827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!found) {
10857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No match at all, we have run off the end of the target text.
10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
10877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We have found a match in CE space.
10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Now determine the bounds in string index space.
10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // There still is a chance of match failure if the CE range not correspond to
10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // an acceptable character range.
10937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //
10947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI lastCEI = ceb.get(targetIx + targetIxOffset -1);
10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mStart = firstCEI.lowIndex_;
10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minLimit = lastCEI.lowIndex_;
10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look at the CE following the match.  If it is UCOL_NULLORDER the match
11007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // extended to the end of input, and the match is good.
11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look at the high and low indices of the CE following the match. If
11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // they are the same it means one of two things:
11047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //    1. The match extended to the last CE from the target text, which is OK, or
11057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //    2. The last CE that was part of the match is in an expansion that extends
11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //       to the first CE after the match. In this case, we reject the match.
11077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI nextCEI = null;
11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.elementComparisonType_ == ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                nextCEI = ceb.get(targetIx + targetIxOffset);
11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                maxLimit = nextCEI.lowIndex_;
11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (nextCEI.lowIndex_ == nextCEI.highIndex_ && nextCEI.ce_ != CollationPCE.PROCESSED_NULLORDER) {
11127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    found = false;
11137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
11157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (;; ++targetIxOffset) {
11167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    nextCEI = ceb.get(targetIx + targetIxOffset);
11177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    maxLimit = nextCEI.lowIndex_;
11187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If we are at the end of the target too, match succeeds
11197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (nextCEI.ce_ == CollationPCE.PROCESSED_NULLORDER) {
11207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
11217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // As long as the next CE has primary weight of 0,
11237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // it is part of the last target element matched by the pattern;
11247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // make sure it can be part of a match with the last patCE
11257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if ((((nextCEI.ce_) >>> 32) & 0xFFFF0000L) == 0) {
11267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        int ceMatch = compareCE64s(nextCEI.ce_, patCE, search_.elementComparisonType_);
11277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (ceMatch == CE_NO_MATCH || ceMatch == CE_SKIP_PATN ) {
11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            found = false;
11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            break;
11307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
11317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If lowIndex == highIndex, this target CE is part of an expansion of the last matched
11327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // target element, but it has non-zero primary weight => match fails
11337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if ( nextCEI.lowIndex_ == nextCEI.highIndex_ ) {
11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        found = false;
11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
11367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Else the target CE is not part of an expansion of the last matched element, match succeeds
11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
11387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check for the start of the match being within a combining sequence.
11447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This can happen if the pattern itself begins with a combining char, and
11457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the match found combining marks in the target text that were attached
11467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // to something else.
11477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This type of match should be rejected for not completely consuming a
11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // combining sequence.
11497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!isBreakBoundary(mStart)) {
11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check for the start of the match being within an Collation Element Expansion,
11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // meaning that the first char of the match is only partially matched.
11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // With expansions, the first CE will report the index of the source
11567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // character, and all subsequent (expansions) CEs will report the source index of the
11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // _following_ character.
11587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int secondIx = firstCEI.highIndex_;
11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (mStart == secondIx) {
11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1163bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // Allow matches to end in the middle of a grapheme cluster if the following
1164bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // conditions are met; this is needed to make prefix search work properly in
1165bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // Indic, see #11750
1166bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * the default breakIter is being used
1167bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * the next collation element after this combining sequence
1168bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   - has non-zero primary weight
1169bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   - corresponds to a separate character following the one at end of the current match
1170bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   (the second of these conditions, and perhaps both, may be redundant given the
1171bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   subsequent check for normalization boundary; however they are likely much faster
1172bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   tests in any case)
1173bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * the match limit is a normalization boundary
1174bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            boolean allowMidclusterMatch =
1175bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                            breakIterator == null &&
1176bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                            nextCEI != null && (((nextCEI.ce_) >>> 32) & 0xFFFF0000L) != 0 &&
1177bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                            maxLimit >= lastCEI.highIndex_ && nextCEI.highIndex_ > maxLimit &&
1178bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                            (nfd_.hasBoundaryBefore(codePointAt(targetText, maxLimit)) ||
1179bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                    nfd_.hasBoundaryAfter(codePointBefore(targetText, maxLimit)));
1180bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1181bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // If those conditions are met, then:
1182bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * do NOT advance the candidate match limit (mLimit) to a break boundary; however
1183bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   the match limit may be backed off to a previous break boundary. This handles
1184bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   cases in which mLimit includes target characters that are ignorable with current
1185bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   settings (such as space) and which extend beyond the pattern match.
1186bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * do NOT require that end of the combining sequence not extend beyond the match in CE space
1187bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * do NOT require that match limit be on a breakIter boundary
1188bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
11897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Advance the match end position to the first acceptable match boundary.
11907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This advances the index over any combining characters.
11917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mLimit = maxLimit;
11927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (minLimit < maxLimit) {
11937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // When the last CE's low index is same with its high index, the CE is likely
11947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // a part of expansion. In this case, the index is located just after the
11957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // character corresponding to the CEs compared above. If the index is right
11967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // at the break boundary, move the position to the next boundary will result
11977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // incorrect match length when there are ignorable characters exist between
11987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the position and the next character produces CE(s). See ticket#8482.
11997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (minLimit == lastCEI.highIndex_ && isBreakBoundary(minLimit)) {
12007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    mLimit = minLimit;
12017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
12027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int nba = nextBoundaryAfter(minLimit);
1203bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // Note that we can have nba < maxLimit && nba >= minLImit, in which
1204bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // case we want to set mLimit to nba regardless of allowMidclusterMatch
1205bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // (i.e. we back off mLimit to the previous breakIterator boundary).
1206bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (nba >= lastCEI.highIndex_ && (!allowMidclusterMatch || nba < maxLimit)) {
12077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        mLimit = nba;
12087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
12097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1212bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (!allowMidclusterMatch) {
1213bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // If advancing to the end of a combining sequence in character indexing space
1214bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // advanced us beyond the end of the match in CE space, reject this match.
1215bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (mLimit > maxLimit) {
1216bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    found = false;
1217bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
12187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1219bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (!isBreakBoundary(mLimit)) {
1220bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    found = false;
1221bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
12227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!checkIdentical(mStart, mLimit)) {
12257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
12267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (found) {
12297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // All Done.  Store back the match bounds to the caller.
12347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (found == false) {
12367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mLimit = -1;
12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mStart = -1;
12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (m != null) {
12417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m.start_ = mStart;
12427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m.limit_ = mLimit;
12437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return found;
12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1248bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private static int codePointAt(CharacterIterator iter, int index) {
1249bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int currentIterIndex = iter.getIndex();
1250bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        char codeUnit = iter.setIndex(index);
1251bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int cp = codeUnit;
1252bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (Character.isHighSurrogate(codeUnit)) {
1253bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            char nextUnit = iter.next();
1254bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (Character.isLowSurrogate(nextUnit)) {
1255bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                cp = Character.toCodePoint(codeUnit, nextUnit);
1256bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
1257bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
1258bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        iter.setIndex(currentIterIndex);  // restore iter position
1259bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return cp;
1260bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
1261bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1262bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private static int codePointBefore(CharacterIterator iter, int index) {
1263bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int currentIterIndex = iter.getIndex();
1264bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        iter.setIndex(index);
1265bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        char codeUnit = iter.previous();
1266bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int cp = codeUnit;
1267bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (Character.isLowSurrogate(codeUnit)) {
1268bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            char prevUnit = iter.previous();
1269bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (Character.isHighSurrogate(prevUnit)) {
1270bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                cp = Character.toCodePoint(prevUnit, codeUnit);
1271bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
1272bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
1273bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        iter.setIndex(currentIterIndex);  // restore iter position
1274bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return cp;
1275bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
1276bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
12777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean searchBackwards(int startIdx, Match m) {
12787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //ICU4C_TODO comment:  reject search patterns beginning with a combining char.
12797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Input parameter sanity check.
12817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ == 0
12827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || startIdx < search_.beginIndex()
12837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || startIdx > search_.endIndex()) {
12847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("searchBackwards(" + startIdx + ", m) - expected position to be between " +
12857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    search_.beginIndex() + " and " + search_.endIndex());
12867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.PCE_ == null) {
12897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            initializePatternPCETable();
12907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEBuffer ceb = new CEBuffer(this);
12937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targetIx = 0;
12947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
12967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Pre-load the buffer with the CE's for the grapheme
12977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * after our starting position so that we're sure that
12987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * we can look at the CE following the match when we
12997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * check the match boundaries.
13007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
13017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * This will also pre-fetch the first CE that we'll
13027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * consider for the match.
13037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
13047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (startIdx < search_.endIndex()) {
13057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BreakIterator bi = search_.internalBreakIter_;
13067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int next = bi.following(startIdx);
13077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(next);
13097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (targetIx = 0; ; targetIx++) {
13117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ceb.getPrevious(targetIx).lowIndex_ < startIdx) {
13127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
13137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
13167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(startIdx);
13177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI targetCEI = null;
13207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patIx;
13217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean found;
13227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limitIx = targetIx;
13247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mStart = -1;
13257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mLimit = -1;
13267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int minLimit;
13277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int maxLimit;
13287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Outer loop moves over match starting positions in the
13307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //      target CE space.
13317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Here, targetIx values increase toward the beginning of the base text (i.e. we get the text CEs in reverse order).
13327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // But  patIx is 0 at the beginning of the pattern and increases toward the end.
13337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // So this loop performs a comparison starting with the end of pattern, and prcessd toward the beginning of the pattern
13347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and the beginning of the base text.
13357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (targetIx = limitIx; ; targetIx++) {
13367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            found = true;
13377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // For targetIx > limitIx, this ceb.getPrevious gets a CE that is as far back in the ring buffer
13387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // (compared to the last CE fetched for the previous targetIx value) as we need to go
13397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for this targetIx value, so if it is non-NULL then other ceb.getPrevious calls should be OK.
13407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI lastCEI = ceb.getPrevious(targetIx);
13417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (lastCEI == null) {
13427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new ICUException("CEBuffer.getPrevious(" + targetIx + ") returned null.");
13437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Inner loop checks for a match beginning at each
13457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // position from the outer loop.
13467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int targetIxOffset = 0;
13477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (patIx = pattern_.PCELength_ - 1; patIx >= 0; patIx--) {
13487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long patCE = pattern_.PCE_[patIx];
13497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                targetCEI = ceb.getPrevious(targetIx + pattern_.PCELength_ - 1 - patIx + targetIxOffset);
13517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Compare CE from target string with CE from the pattern.
13527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
13537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // which will fail the compare, below.
13547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ceMatch = compareCE64s(targetCEI.ce_, patCE, search_.elementComparisonType_);
13557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ceMatch == CE_NO_MATCH) {
13567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    found = false;
13577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
13587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if (ceMatch > CE_NO_MATCH) {
13597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (ceMatch == CE_SKIP_TARG) {
13607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // redo with same patCE, next targCE
13617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        patIx++;
13627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targetIxOffset++;
13637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else { // ceMatch == CE_SKIP_PATN
13647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // redo with same targCE, next patCE
13657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targetIxOffset--;
13667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
13677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!found && ((targetCEI == null) || (targetCEI.ce_ != CollationPCE.PROCESSED_NULLORDER))) {
13717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No match at this targetIx.  Try again at the next.
13727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
13737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!found) {
13767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No match at all, we have run off the end of the target text.
13777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
13787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We have found a match in CE space.
13817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Now determine the bounds in string index space.
13827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // There still is a chance of match failure if the CE range not correspond to
13837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // an acceptable character range.
13847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //
13857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI firstCEI = ceb.getPrevious(targetIx + pattern_.PCELength_ - 1 + targetIxOffset);
13867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mStart = firstCEI.lowIndex_;
13877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check for the start of the match being within a combining sequence.
13897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This can happen if the pattern itself begins with a combining char, and
13907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the match found combining marks in the target text that were attached
13917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // to something else.
13927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This type of match should be rejected for not completely consuming a
13937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // combining sequence.
13947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!isBreakBoundary(mStart)) {
13957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
13967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look at the high index of the first CE in the match. If it's the same as the
13997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // low index, the first CE in the match is in the middle of an expansion.
14007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (mStart == firstCEI.highIndex_) {
14017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
14027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minLimit = lastCEI.lowIndex_;
14057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (targetIx > 0) {
14077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Look at the CE following the match.  If it is UCOL_NULLORDER the match
14087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // extended to the end of input, and the match is good.
14097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Look at the high and low indices of the CE following the match. If
14117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // they are the same it means one of two things:
14127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    1. The match extended to the last CE from the target text, which is OK, or
14137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    2. The last CE that was part of the match is in an expansion that extends
14147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //       to the first CE after the match. In this case, we reject the match.
14157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                CEI nextCEI  = ceb.getPrevious(targetIx - 1);
14167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (nextCEI.lowIndex_ == nextCEI.highIndex_ && nextCEI.ce_ != CollationPCE.PROCESSED_NULLORDER) {
14187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    found = false;
14197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                mLimit = maxLimit = nextCEI.lowIndex_;
14227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1423bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // Allow matches to end in the middle of a grapheme cluster if the following
1424bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // conditions are met; this is needed to make prefix search work properly in
1425bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // Indic, see #11750
1426bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * the default breakIter is being used
1427bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * the next collation element after this combining sequence
1428bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   - has non-zero primary weight
1429bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   - corresponds to a separate character following the one at end of the current match
1430bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   (the second of these conditions, and perhaps both, may be redundant given the
1431bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   subsequent check for normalization boundary; however they are likely much faster
1432bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   tests in any case)
1433bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * the match limit is a normalization boundary
1434bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                boolean allowMidclusterMatch =
1435bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                breakIterator == null &&
1436bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                nextCEI != null && (((nextCEI.ce_) >>> 32) & 0xFFFF0000L) != 0 &&
1437bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                maxLimit >= lastCEI.highIndex_ && nextCEI.highIndex_ > maxLimit &&
1438bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                (nfd_.hasBoundaryBefore(codePointAt(targetText, maxLimit)) ||
1439bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                        nfd_.hasBoundaryAfter(codePointBefore(targetText, maxLimit)));
1440bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1441bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // If those conditions are met, then:
1442bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * do NOT advance the candidate match limit (mLimit) to a break boundary; however
1443bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   the match limit may be backed off to a previous break boundary. This handles
1444bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   cases in which mLimit includes target characters that are ignorable with current
1445bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   settings (such as space) and which extend beyond the pattern match.
1446bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * do NOT require that end of the combining sequence not extend beyond the match in CE space
1447bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * do NOT require that match limit be on a breakIter boundary
1448bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
14497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Advance the match end position to the first acceptable match boundary.
14507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // This advances the index over any combining charcters.
14517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (minLimit < maxLimit) {
14527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int nba = nextBoundaryAfter(minLimit);
1453bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // Note that we can have nba < maxLimit && nba >= minLImit, in which
1454bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // case we want to set mLimit to nba regardless of allowMidclusterMatch
1455bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // (i.e. we back off mLimit to the previous breakIterator boundary).
1456bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (nba >= lastCEI.highIndex_ && (!allowMidclusterMatch || nba < maxLimit)) {
14577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        mLimit = nba;
14587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
14597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1461bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (!allowMidclusterMatch) {
1462bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // If advancing to the end of a combining sequence in character indexing space
1463bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // advanced us beyond the end of the match in CE space, reject this match.
1464bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (mLimit > maxLimit) {
1465bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        found = false;
1466bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    }
14677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1468bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // Make sure the end of the match is on a break boundary
1469bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (!isBreakBoundary(mLimit)) {
1470bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        found = false;
1471bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    }
14727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
14757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No non-ignorable CEs after this point.
14767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The maximum position is detected by boundary after
14777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the last non-ignorable CE. Combining sequence
14787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // across the start index will be truncated.
14797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int nba = nextBoundaryAfter(minLimit);
14807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx;
14817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!checkIdentical(mStart, mLimit)) {
14847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
14857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (found) {
14887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
14897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // All Done.  Store back the match bounds to the caller.
14937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
14947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (found == false) {
14957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mLimit = -1;
14967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mStart = -1;
14977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (m != null) {
15007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m.start_ = mStart;
15017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m.limit_ = mLimit;
15027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return found;
15057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note:
15087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
15097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ICU4C usearch_handleNextExact() is identical to usearch_handleNextCanonical()
15107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // for the linear search implementation. The differences are addressed in search().
15117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
15127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handleNextExact() {
15137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handleNextCommonImpl();
15147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handleNextCanonical() {
15177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handleNextCommonImpl();
15187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handleNextCommonImpl() {
15217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int textOffset = textIter_.getOffset();
15227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Match match = new Match();
15237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search(textOffset, match)) {
15257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.matchedIndex_ = match.start_;
15267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.setMatchedLength(match.limit_ - match.start_);
15277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
15287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
15297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setMatchNotFound();
15307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
15317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note:
15357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
15367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ICU4C usearch_handlePreviousExact() is identical to usearch_handlePreviousCanonical()
15377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // for the linear search implementation. The differences are addressed in searchBackwards().
15387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
15397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handlePreviousExact() {
15407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handlePreviousCommonImpl();
15417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handlePreviousCanonical() {
15447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handlePreviousCommonImpl();
15457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handlePreviousCommonImpl() {
15487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int textOffset;
15497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.isOverlap_) {
15517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ != DONE) {
15527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textOffset = search_.matchedIndex_ + search_.matchedLength() - 1;
15537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
15547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // move the start position at the end of possible match
15557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                initializePatternPCETable();
15567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (!initTextProcessedIter()) {
15577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setMatchNotFound();
15587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
15597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int nPCEs = 0; nPCEs < pattern_.PCELength_ - 1; nPCEs++) {
15617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    long pce = textProcessedIter_.nextProcessed(null);
15627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (pce == CollationPCE.PROCESSED_NULLORDER) {
15637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // at the end of the text
15647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
15657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
15667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textOffset = textIter_.getOffset();
15687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
15697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
15707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textOffset = textIter_.getOffset();
15717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Match match = new Match();
15747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (searchBackwards(textOffset, match)) {
15757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.matchedIndex_ = match.start_;
15767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.setMatchedLength(match.limit_ - match.start_);
15777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
15787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
15797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setMatchNotFound();
15807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
15817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets a substring out of a CharacterIterator
15867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java porting note: Not available in ICU4C
15887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param text CharacterIterator
15907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param start start offset
15917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param length of substring
15927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return substring from text starting at start and length length
15937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String getString(CharacterIterator text, int start, int length) {
15957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder result = new StringBuilder(length);
15967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int offset = text.getIndex();
15977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setIndex(start);
15987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < length; i++) {
15997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.append(text.current());
16007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.next();
16017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setIndex(offset);
16037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result.toString();
16047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java port of ICU4C struct UPattern (usrchimp.h)
16087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class Pattern {
16107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Pattern string */
16117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String text_;
16127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long[] PCE_;
16147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int PCELength_ = 0;
16157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: We probably do not need CE_ / CELength_
16177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        @SuppressWarnings("unused")
16187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int[] CE_;
16197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int CELength_ = 0;
16207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // *** Boyer-Moore ***
16227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // boolean hasPrefixAccents_ = false;
16237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // boolean hasSuffixAccents_ = false;
16247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // int defaultShiftSize_;
16257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // char[] shift_;
16267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // char[] backShift_;
16277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Pattern(String pattern) {
16297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text_ = pattern;
16307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java port of ICU4C UCollationPCE (usrchimp.h)
16357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class CollationPCE {
16377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final long PROCESSED_NULLORDER = -1;
16387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int DEFAULT_BUFFER_SIZE = 16;
16407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int BUFFER_GROW = 8;
16417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: PRIMARYORDERMASK is also duplicated in StringSearch class
16437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int PRIMARYORDERMASK = 0xffff0000;
16447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int CONTINUATION_MARKER = 0xc0;
16457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private PCEBuffer pceBuffer_ = new PCEBuffer();
16477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private CollationElementIterator cei_;
16487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int strength_;
16497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private boolean toShift_;
16507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private boolean isShifted_;
16517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int variableTop_;
16527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CollationPCE(CollationElementIterator iter) {
16547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            init(iter);
16557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void init(CollationElementIterator iter) {
16587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cei_ = iter;
16597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            init(iter.getRuleBasedCollator());
16607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private void init(RuleBasedCollator coll) {
16637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength_ = coll.getStrength();
16647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            toShift_ = coll.isAlternateHandlingShifted();
16657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isShifted_ = false;
16667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            variableTop_ = coll.getVariableTop();
16677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        @SuppressWarnings("fallthrough")
16707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private long processCE(int ce) {
16717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
16727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This is clean, but somewhat slow...
16747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We could apply the mask to ce and then
16757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // just get all three orders...
16767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            switch (strength_) {
16777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            default:
16787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tertiary = CollationElementIterator.tertiaryOrder(ce);
16797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* note fall-through */
16807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collator.SECONDARY:
16827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                secondary = CollationElementIterator.secondaryOrder(ce);
16837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* note fall-through */
16847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collator.PRIMARY:
16867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                primary = CollationElementIterator.primaryOrder(ce);
16877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
16887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** This should probably handle continuations too. ****
16907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** That means that we need 24 bits for the primary ****
16917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** instead of the 16 that we're currently using. ****
16927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** So we can lay out the 64 bits as: 24.12.12.16. ****
16937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** Another complication with continuations is that ****
16947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** the *second* CE is marked as a continuation, so ****
16957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** we always have to peek ahead to know how long ****
16967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** the primary is... ****
16977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if ((toShift_ && variableTop_ > ce && primary != 0) || (isShifted_ && primary == 0)) {
16987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (primary == 0) {
17007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return CollationElementIterator.IGNORABLE;
17017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (strength_ >= Collator.QUATERNARY) {
17047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    quaternary = primary;
17057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                primary = secondary = tertiary = 0;
17087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                isShifted_ = true;
17097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
17107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (strength_ >= Collator.QUATERNARY) {
17117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    quaternary = 0xFFFF;
17127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                isShifted_ = false;
17157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
17167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
17187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
17217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Get the processed ordering priority of the next collation element in the text.
17227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * A single character may contain more than one collation element.
17237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
17247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Note: This is equivalent to
17257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * UCollationPCE::nextProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
17267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
17277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param range receiving the iterator index before/after fetching the CE.
17287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @return The next collation elements ordering, otherwise returns PROCESSED_NULLORDER
17297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *         if an error has occurred or if the end of string has been reached
17307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
17317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public long nextProcessed(Range range) {
17327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long result = CollationElementIterator.IGNORABLE;
17337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int low = 0, high = 0;
17347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pceBuffer_.reset();
17367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            do {
17387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                low = cei_.getOffset();
17397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ce = cei_.next();
17407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                high = cei_.getOffset();
17417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ce == CollationElementIterator.NULLORDER) {
17437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     result = PROCESSED_NULLORDER;
17447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     break;
17457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = processCE(ce);
17487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } while (result == CollationElementIterator.IGNORABLE);
17497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (range != null) {
17517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                range.ixLow_ = low;
17527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                range.ixHigh_ = high;
17537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
17547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
17567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
17597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Get the processed ordering priority of the previous collation element in the text.
17607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * A single character may contain more than one collation element.
17617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
17627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Note: This is equivalent to
17637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * UCollationPCE::previousProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
17647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
17657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param range receiving the iterator index before/after fetching the CE.
17667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @return The previous collation elements ordering, otherwise returns
17677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *         PROCESSED_NULLORDER if an error has occurred or if the start of
17687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *         string has been reached.
17697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
17707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public long previousProcessed(Range range) {
17717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long result = CollationElementIterator.IGNORABLE;
17727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int low = 0, high = 0;
17737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // pceBuffer_.reset();
17757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (pceBuffer_.empty()) {
17777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // buffer raw CEs up to non-ignorable primary
17787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                RCEBuffer rceb = new RCEBuffer();
17797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ce;
17807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean finish = false;
17827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // **** do we need to reset rceb, or will it always be empty at this point ****
17847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
17857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    high = cei_.getOffset();
17867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ce = cei_.previous();
17877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    low = cei_.getOffset();
17887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (ce == CollationElementIterator.NULLORDER) {
17907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (!rceb.empty()) {
17917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            break;
17927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
17937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        finish = true;
17957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
17967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
17977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    rceb.put(ce, low, high);
17997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } while ((ce & PRIMARYORDERMASK) == 0 || isContinuation(ce));
18007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (finish) {
18027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
18037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // process the raw CEs
18067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (!rceb.empty()) {
18077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    RCEI rcei = rceb.get();
18087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    result = processCE(rcei.ce_);
18107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (result != CollationElementIterator.IGNORABLE) {
18127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        pceBuffer_.put(result, rcei.low_, rcei.high_);
18137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
18147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (pceBuffer_.empty()) {
18187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // **** Is -1 the right value for ixLow, ixHigh? ****
18197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (range != null) {
18207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    range.ixLow_ = -1;
18217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    range.ixHigh_ = -1;
18227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CollationElementIterator.NULLORDER;
18247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            PCEI pcei = pceBuffer_.get();
18277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (range != null) {
18297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                range.ixLow_ = pcei.low_;
18307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                range.ixHigh_ = pcei.high_;
18317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return pcei.ce_;
18347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static boolean isContinuation(int ce) {
18377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return ((ce & CONTINUATION_MARKER) == CONTINUATION_MARKER);
18387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final class Range {
18417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int ixLow_;
18427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int ixHigh_;
18437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Processed collation element buffer stuff ported from ICU4C ucoleitr.cpp */
18467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final class PCEI {
18477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long ce_;
18487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int low_;
18497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int high_;
18507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final class PCEBuffer {
18537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private PCEI[] buffer_ = new PCEI[DEFAULT_BUFFER_SIZE];
18547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private int bufferIndex_ = 0;
18557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            void reset() {
18577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bufferIndex_ = 0;
18587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean empty() {
18617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return bufferIndex_ <= 0;
18627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            void put(long ce, int ixLow, int ixHigh)
18657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            {
18667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (bufferIndex_ >= buffer_.length) {
18677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    PCEI[] newBuffer = new PCEI[buffer_.length + BUFFER_GROW];
18687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.arraycopy(buffer_, 0, newBuffer, 0, buffer_.length);
18697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer_ = newBuffer;
18707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_] = new PCEI();
18727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].ce_ = ce;
18737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].low_ = ixLow;
18747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].high_ = ixHigh;
18757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bufferIndex_ += 1;
18777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            PCEI get() {
18807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (bufferIndex_ > 0) {
18817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return buffer_[--bufferIndex_];
18827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
18847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Raw collation element buffer stuff ported from ICU4C ucoleitr.cpp */
18887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final class RCEI {
18897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int ce_;
18907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int low_;
18917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int high_;
18927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final class RCEBuffer {
18957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private RCEI[] buffer_ = new RCEI[DEFAULT_BUFFER_SIZE];
18967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private int bufferIndex_ = 0;
18977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean empty() {
18997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return bufferIndex_ <= 0;
19007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            void put(int ce, int ixLow, int ixHigh) {
19037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (bufferIndex_ >= buffer_.length) {
19047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    RCEI[] newBuffer = new RCEI[buffer_.length + BUFFER_GROW];
19057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.arraycopy(buffer_, 0, newBuffer, 0, buffer_.length);
19067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer_ = newBuffer;
19077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
19087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_] = new RCEI();
19097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].ce_ = ce;
19107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].low_ = ixLow;
19117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].high_ = ixHigh;
19127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bufferIndex_ += 1;
19147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            RCEI get() {
19177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (bufferIndex_ > 0) {
19187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return buffer_[--bufferIndex_];
19197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
19207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
19217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
19237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
19247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
19267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java port of ICU4C CEI (usearch.cpp)
19277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * CEI  Collation Element + source text index.
19297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *      These structs are kept in the circular buffer.
19307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
19317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class CEI {
19327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long ce_;
19337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lowIndex_;
19347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int highIndex_;
19357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
19367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
19387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * CEBuffer A circular buffer of CEs from the text being searched
19397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
19407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class CEBuffer {
19417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Java porting note: ICU4C uses the size for stack buffer
19427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // static final int DEFAULT_CEBUFFER_SIZE = 96;
19437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static final int CEBUFFER_EXTRA = 32;
19457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static final int MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L = 8;
19467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static final int MAX_TARGET_IGNORABLES_PER_PAT_OTHER = 3;
19477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI[] buf_;
19497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int bufSize_;
19507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int firstIx_;
19517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limitIx_;
19527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Java porting note: No references in ICU4C implementation
19547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // CollationElementIterator ceIter_;
19557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringSearch strSearch_;
19577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEBuffer(StringSearch ss) {
19597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strSearch_ = ss;
19607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            bufSize_ = ss.pattern_.PCELength_ + CEBUFFER_EXTRA;
19617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (ss.search_.elementComparisonType_ != ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
19627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String patText = ss.pattern_.text_;
19637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (patText != null) {
19647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    for (int i = 0; i < patText.length(); i++) {
19657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        char c = patText.charAt(i);
19667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (MIGHT_BE_JAMO_L(c)) {
19677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            bufSize_ += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L;
19687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else {
19697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // No check for surrogates, we might allocate slightly more buffer than necessary.
19707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            bufSize_ += MAX_TARGET_IGNORABLES_PER_PAT_OTHER;
19717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
19727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
19737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
19747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Not used - see above
19777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // ceIter_ = ss.textIter_;
19787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            firstIx_ = 0;
19807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limitIx_ = 0;
19817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!ss.initTextProcessedIter()) {
19837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
19847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_ = new CEI[bufSize_];
19877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
19887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Get the CE with the specified index.
19907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   Index must be in the range
19917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //             n-history_size < index < n+1
19927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   where n is the largest index to have been fetched by some previous call to this function.
19937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
19947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
19957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI get(int index) {
19967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = index % bufSize_;
19977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index >= firstIx_ && index < limitIx_) {
19997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The request was for an entry already in our buffer.
20007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Just return it.
20017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return buf_[i];
20027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Caller is requesting a new, never accessed before, CE.
20057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Verify that it is the next one in sequence, which is all
20067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // that is allowed.
20077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index != limitIx_) {
20087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert(false);
20097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
20107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Manage the circular CE buffer indexing
20137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limitIx_++;
20147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (limitIx_ - firstIx_ >= bufSize_) {
20167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The buffer is full, knock out the lowest-indexed entry.
20177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                firstIx_++;
20187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationPCE.Range range = new CollationPCE.Range();
20217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (buf_[i] == null) {
20227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buf_[i] = new CEI();
20237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].ce_ = strSearch_.textProcessedIter_.nextProcessed(range);
20257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].lowIndex_ = range.ixLow_;
20267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].highIndex_ = range.ixHigh_;
20277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return buf_[i];
20297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Get the CE with the specified index.
20327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   Index must be in the range
20337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //             n-history_size < index < n+1
20347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   where n is the largest index to have been fetched by some previous call to this function.
20357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
20367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
20377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI getPrevious(int index) {
20387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = index % bufSize_;
20397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index >= firstIx_ && index < limitIx_) {
20417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The request was for an entry already in our buffer.
20427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Just return it.
20437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return buf_[i];
20447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Caller is requesting a new, never accessed before, CE.
20477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Verify that it is the next one in sequence, which is all
20487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // that is allowed.
20497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index != limitIx_) {
20507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert(false);
20517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
20527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Manage the circular CE buffer indexing
20557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limitIx_++;
20567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (limitIx_ - firstIx_ >= bufSize_) {
20587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The buffer is full, knock out the lowest-indexed entry.
20597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                firstIx_++;
20607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationPCE.Range range = new CollationPCE.Range();
20637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (buf_[i] == null) {
20647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buf_[i] = new CEI();
20657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].ce_ = strSearch_.textProcessedIter_.previousProcessed(range);
20677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].lowIndex_ = range.ixLow_;
20687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].highIndex_ = range.ixHigh_;
20697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return buf_[i];
20717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static boolean MIGHT_BE_JAMO_L(char c) {
20747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (c >= 0x1100 && c <= 0x115E)
20757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    || (c >= 0x3131 && c <= 0x314E)
20767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    || (c >= 0x3165 && c <= 0x3186);
20777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
20797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
2080