12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
5bee65486a185907111f3be60992433e133ec0e32Scott Russell * Copyright (C) 1996-2016, International Business Machines Corporation and
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.StringCharacterIterator;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Locale;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUException;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// Java porting note:
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        The ICU4C implementation contains dead code in many places.
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      While porting the ICU4C linear search implementation, this dead code
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      was not fully ported. The code blocks tagged by "// *** Boyer-Moore ***"
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      are those dead code blocks, still available in ICU4C.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        The ICU4C implementation does not seem to handle UCharacterIterator pointing
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      to a fragment of text properly. ICU4J uses CharacterIterator to navigate through
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      the input text. We need to carefully review the code ported from ICU4C
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      assuming the start index is 0.
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        ICU4C implementation initializes pattern.CE and pattern.PCE. It looks like
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      CE is no longer used, except in a few places checking CELength. It looks like this
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      is a leftover from already-disabled Boyer-Moore search code. This Java implementation
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      preserves the code, but we should clean this up later.
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>StringSearch</tt> is a {@link SearchIterator} that provides
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * language-sensitive text searching based on the comparison rules defined
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * in a {@link RuleBasedCollator} object.
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * StringSearch ensures that language eccentricity can be
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * handled, e.g. for the German collator, characters &szlig; and SS will be matched
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * if case is chosen to be ignored.
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "ICU Collation Design Document"</a> for more information.
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * There are 2 match options for selection:<br>
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Let S' be the sub-string of a text string S between the offsets start and
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * end [start, end].
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <br>
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A pattern string P matches a text string S at the offsets [start, end]
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * if
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * option 1. Some canonical equivalent of P matches some canonical equivalent
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *           of S'
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * option 2. P matches S' and if P starts or ends with a combining mark,
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *           there exists no non-ignorable combining mark before or after S?
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *           in S respectively.
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Option 2. is the default.
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This search has APIs similar to that of other text iteration mechanisms
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * such as the break iterators in {@link BreakIterator}. Using these
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * APIs, it is easy to scan through text looking for all occurrences of
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a given pattern. This search iterator allows changing of direction by
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * calling a {@link #reset} followed by a {@link #next} or {@link #previous}.
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Though a direction change can occur without calling {@link #reset} first,
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this operation comes with some speed penalty.
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Match results in the forward direction will match the result matches in
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the backwards direction in the reverse order
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link SearchIterator} provides APIs to specify the starting position
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * within the text string to be searched, e.g. {@link SearchIterator#setIndex setIndex},
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link SearchIterator#preceding preceding} and {@link SearchIterator#following following}.
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Since the starting position will be set as it is specified, please take note that
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * there are some danger points at which the search may render incorrect
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * results:
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul>
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li> In the midst of a substring that requires normalization.
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li> If the following match is to be found, the position should not be the
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      second character which requires swapping with the preceding
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      character. Vice versa, if the preceding match is to be found, the
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      position to search from should not be the first character which
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      requires swapping with the next character. E.g certain Thai and
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      Lao characters require swapping.
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li> If a following pattern match is to be found, any position within a
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      contracting sequence except the first will fail. Vice versa if a
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      preceding pattern match is to be found, an invalid starting point
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      would be any character within a contracting sequence except the last.
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </ul>
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A {@link BreakIterator} can be used if only matches at logical breaks are desired.
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Using a {@link BreakIterator} will only give you results that exactly matches the
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * boundaries given by the {@link BreakIterator}. For instance the pattern "e" will
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * not be found in the string "\u00e9" if a character break iterator is used.
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Options are provided to handle overlapping matches.
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * E.g. In English, overlapping matches produces the result 0 and 2
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for the pattern "abab" in the text "ababab", where mutually
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * exclusive matches only produces the result of 0.
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Options are also provided to implement "asymmetric search" as described in
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search">
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UTS #10 Unicode Collation Algorithm</a>, specifically the ElementComparisonType
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * values.
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Though collator attributes will be taken into consideration while
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * performing matches, there are no APIs here for setting and getting the
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * attributes. These attributes can be set by getting the collator
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * from {@link #getCollator} and using the APIs in {@link RuleBasedCollator}.
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Lastly to update <tt>StringSearch</tt> to the new collator attributes,
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #reset} has to be called.
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Restriction: <br>
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Currently there are no composite characters that consists of a
115bee65486a185907111f3be60992433e133ec0e32Scott Russell * character with combining class &gt; 0 before a character with combining
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * class == 0. However, if such a character exists in the future,
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>StringSearch</tt> does not guarantee the results for option 1.
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Consult the {@link SearchIterator} documentation for information on
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and examples of how to use instances of this class to implement text
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * searching.
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Note, <tt>StringSearch</tt> is not to be subclassed.
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p>
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see SearchIterator
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see RuleBasedCollator
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Laura Werner, synwee
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// internal notes: all methods do not guarantee the correct status of the
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// characteriterator. the caller has to maintain the original index position
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// if necessary. methods could change the index position as it deems fit
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class StringSearch extends SearchIterator {
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Pattern pattern_;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private RuleBasedCollator collator_;
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // positions within the collation element iterator is used to determine
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // if we are at the start of the text.
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationElementIterator textIter_;
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationPCE textProcessedIter_;
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // utility collation element, used throughout program for temporary
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // iteration.
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationElementIterator utilIter_;
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private Normalizer2 nfd_;
148bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int strength_;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int ceMask_;
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int variableTop_;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean toShift_;
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // *** Boyer-Moore ***
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private char[] canonicalPrefixAccents_;
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private char[] canonicalSuffixAccents_;
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules defined in
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the argument collator to search for argument pattern in the argument
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * target text. The argument <code>breakiter</code> is used to define logical matches.
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See super class documentation for more details on the use of the target
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * text and {@link BreakIterator}.
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator {@link RuleBasedCollator} that defines the language rules
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param breakiter A {@link BreakIterator} that is used to determine the
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                boundaries of a logical match. This argument can be null.
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see BreakIterator
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see RuleBasedCollator
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, CharacterIterator target, RuleBasedCollator collator,
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BreakIterator breakiter) {
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // This implementation is ported from ICU4C usearch_open()
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(target, breakiter);
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // string search does not really work when numeric collation is turned on
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (collator.getNumericCollation()) {
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new UnsupportedOperationException("Numeric collation is not supported by StringSearch");
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        collator_ = collator;
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        strength_ = collator.getStrength();
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ceMask_ = getMask(strength_);
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        toShift_ = collator.isAlternateHandlingShifted();
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        variableTop_ = collator.getVariableTop();
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
194bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        nfd_ = Normalizer2.getNFDInstance();
195bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_ = new Pattern(pattern);
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(0);
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.matchedIndex_ = DONE;
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        utilIter_ = null;
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_ = new CollationElementIterator(target, collator);
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textProcessedIter_ = null;
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // This is done by super class constructor
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isOverlap_ = false;
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isCanonicalMatch_ = false;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isForwardSearching_ = true;
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = true;
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ULocale collLocale = collator.getLocale(ULocale.VALID_LOCALE);
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.internalBreakIter_ = BreakIterator.getCharacterInstance(collLocale == null ? ULocale.ROOT : collLocale);
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.internalBreakIter_.setText((CharacterIterator)target.clone());  // We need to create a clone
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        initialize();
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules defined in
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the argument collator to search for argument pattern in the argument
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * target text. No {@link BreakIterator}s are set to test for logical matches.
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator {@link RuleBasedCollator} that defines the language rules
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see RuleBasedCollator
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, CharacterIterator target, RuleBasedCollator collator) {
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(pattern, target, collator, null);
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules and
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break iterator rules defined in the argument locale to search for
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * argument pattern in the argument target text.
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param locale locale to use for language and break iterator rules
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0. ClassCastException thrown if the collator for
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the specified locale is not a RuleBasedCollator.
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, CharacterIterator target, Locale locale) {
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(pattern, target, ULocale.forLocale(locale));
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules and
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break iterator rules defined in the argument locale to search for
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * argument pattern in the argument target text.
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See super class documentation for more details on the use of the target
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * text and {@link BreakIterator}.
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param locale locale to use for language and break iterator rules
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0. ClassCastException thrown if the collator for
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the specified locale is not a RuleBasedCollator.
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see BreakIterator
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see RuleBasedCollator
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see SearchIterator
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.2
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, CharacterIterator target, ULocale locale) {
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(pattern, target, (RuleBasedCollator) Collator.getInstance(locale), null);
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator to use the language-specific rules and
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break iterator rules defined in the default locale to search for
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * argument pattern in the argument target text.
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern text to look for.
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target target text to search for pattern.
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when argument target is null,
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0. ClassCastException thrown if the collator for
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the default locale is not a RuleBasedCollator.
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringSearch(String pattern, String target) {
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(pattern, new StringCharacterIterator(target),
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (RuleBasedCollator) Collator.getInstance(), null);
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets the {@link RuleBasedCollator} used for the language rules.
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Since <tt>StringSearch</tt> depends on the returned {@link RuleBasedCollator}, any
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * changes to the {@link RuleBasedCollator} result should follow with a call to
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * either {@link #reset()} or {@link #setCollator(RuleBasedCollator)} to ensure the correct
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * search behavior.
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </p>
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return {@link RuleBasedCollator} used by this <tt>StringSearch</tt>
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see RuleBasedCollator
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setCollator
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public RuleBasedCollator getCollator() {
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return collator_;
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the {@link RuleBasedCollator} to be used for language-specific searching.
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator's position will not be changed by this method.
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator to use for this <tt>StringSearch</tt>
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException thrown when collator is null
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getCollator
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setCollator(RuleBasedCollator collator) {
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (collator == null) {
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("Collator can not be null");
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        collator_ = collator;
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ceMask_ = getMask(collator_.getStrength());
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ULocale collLocale = collator.getLocale(ULocale.VALID_LOCALE);
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.internalBreakIter_ = BreakIterator.getCharacterInstance(collLocale == null ? ULocale.ROOT : collLocale);
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.internalBreakIter_.setText((CharacterIterator)search_.text().clone());  // We need to create a clone
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        toShift_ = collator.isAlternateHandlingShifted();
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        variableTop_ = collator.getVariableTop();
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_ = new CollationElementIterator(pattern_.text_, collator);
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        utilIter_ = new CollationElementIterator(pattern_.text_, collator);
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // initialize() _after_ setting the iterators for the new collator.
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        initialize();
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the pattern for which <tt>StringSearch</tt> is searching for.
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the pattern searched for
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String getPattern() {
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return pattern_.text_;
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the pattern to search for.
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator's position will not be changed by this method.
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pattern for searching
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getPattern
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IllegalArgumentException thrown if pattern is null or of
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *               length 0
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setPattern(String pattern) {
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern == null || pattern.length() <= 0) {
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException(
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "Pattern to search for can not be null or of length 0");
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.text_ = pattern;
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        initialize();
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Determines whether canonical matches (option 1, as described in the
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * class documentation) is set.
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See setCanonical(boolean) for more information.
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setCanonical
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if canonical matches is set, false otherwise
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //TODO: hoist this to SearchIterator
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isCanonical() {
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.isCanonicalMatch_;
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the canonical match mode. See class documentation for details.
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The default setting for this property is false.
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param allowCanonical flag indicator if canonical matches are allowed
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #isCanonical
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //TODO: hoist this to SearchIterator
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setCanonical(boolean allowCanonical) {
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isCanonicalMatch_ = allowCanonical;
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setTarget(CharacterIterator text) {
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super.setTarget(text);
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_.setText(text);
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getIndex() {
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = textIter_.getOffset();
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (isOutOfBounds(search_.beginIndex(), search_.endIndex(), result)) {
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return DONE;
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setIndex(int position) {
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Java porting note: This method is equivalent to setOffset() in ICU4C.
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ICU4C SearchIterator::setOffset() is a pure virtual method, while
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ICU4J SearchIterator.setIndex() is not abstract method.
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super.setIndex(position);
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_.setOffset(position);
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void reset() {
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // reset is setting the attributes that are already in
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // string search, hence all attributes in the collator should
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // be retrieved without any problems
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean sameCollAttribute = true;
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ceMask;
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean shift;
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int varTop;
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // **** hack to deal w/ how processed CEs encode quaternary ****
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int newStrength = collator_.getStrength();
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((strength_ < Collator.QUATERNARY && newStrength >= Collator.QUATERNARY)
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || (strength_ >= Collator.QUATERNARY && newStrength < Collator.QUATERNARY)) {
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sameCollAttribute = false;
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        strength_ = collator_.getStrength();
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ceMask = getMask(strength_);
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ceMask_ != ceMask) {
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ceMask_ = ceMask;
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sameCollAttribute = false;
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        shift = collator_.isAlternateHandlingShifted();
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (toShift_ != shift) {
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            toShift_ = shift;
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sameCollAttribute = false;
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        varTop = collator_.getVariableTop();
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (variableTop_ != varTop) {
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            variableTop_ = varTop;
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sameCollAttribute = false;
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (!sameCollAttribute) {
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            initialize();
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_.setText(search_.text());
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(0);
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.matchedIndex_ = DONE;
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isOverlap_ = false;
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isCanonicalMatch_ = false;
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isForwardSearching_ = true;
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = true;
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected int handleNext(int position) {
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ == 0) {
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.matchedIndex_ = search_.matchedIndex_ == DONE ?
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    getIndex() : search_.matchedIndex_ + 1;
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.setMatchedLength(0);
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(search_.matchedIndex_);
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ == search_.endIndex()) {
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.matchedIndex_ = DONE;
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedLength() <= 0) {
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the flipping direction issue has already been handled
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // in next()
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // for boundary check purposes. this will ensure that the
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // next match will not preceed the current offset
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // note search_.matchedIndex_ will always be set to something
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // in the code
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.matchedIndex_ = position - 1;
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(position);
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // ICU4C comment:
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if strsrch_->breakIter is always the same as m_breakiterator_
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // then we don't need to check the match boundaries here because
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // usearch_handleNextXXX will already have done it.
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.isCanonicalMatch_) {
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // *could* actually use exact here 'cause no extra accents allowed...
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                handleNextCanonical();
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                handleNextExact();
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ == DONE) {
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textIter_.setOffset(search_.endIndex());
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textIter_.setOffset(search_.matchedIndex_);
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return search_.matchedIndex_;
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return DONE;
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected int handlePrevious(int position) {
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ == 0) {
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.matchedIndex_ =
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    search_.matchedIndex_ == DONE ? getIndex() : search_.matchedIndex_;
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ == search_.beginIndex()) {
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setMatchNotFound();
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.matchedIndex_--;
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textIter_.setOffset(search_.matchedIndex_);
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.setMatchedLength(0);
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(position);
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.isCanonicalMatch_) {
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // *could* use exact match here since extra accents *not* allowed!
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                handlePreviousCanonical();
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                handlePreviousExact();
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.matchedIndex_;
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ------------------ Internal implementation code ---------------------------
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int INITIAL_ARRAY_SIZE_ = 256;
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // *** Boyer-Moore ***
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private static final Normalizer2Impl nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private static final int LAST_BYTE_MASK_ = 0xff;
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int PRIMARYORDERMASK = 0xffff0000;
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int SECONDARYORDERMASK = 0x0000ff00;
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int TERTIARYORDERMASK = 0x000000ff;
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Getting the mask for collation strength
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param strength collation strength
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return collation element mask
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int getMask(int strength) {
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        switch (strength) {
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case Collator.PRIMARY:
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return PRIMARYORDERMASK;
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case Collator.SECONDARY:
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return SECONDARYORDERMASK | PRIMARYORDERMASK;
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        default:
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return TERTIARYORDERMASK | SECONDARYORDERMASK | PRIMARYORDERMASK;
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // *** Boyer-Moore ***
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final char getFCD(String str, int offset) {
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char ch = str.charAt(offset);
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ch < 0x180) {
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (char) nfcImpl_.getFCD16FromBelow180(ch);
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (nfcImpl_.singleLeadMightHaveNonZeroFCD16(ch)) {
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!Character.isHighSurrogate(ch)) {
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return (char) nfcImpl_.getFCD16FromNormData(ch);
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char c2;
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (++offset < str.length() && Character.isLowSurrogate(c2 = str.charAt(offset))) {
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return (char) nfcImpl_.getFCD16FromNormData(Character.toCodePoint(ch, c2));
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 0;
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final char getFCD(int c) {
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (char)nfcImpl_.getFCD16(c);
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Getting the modified collation elements taking into account the collation
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * attributes.
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sourcece
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the modified collation element
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int getCE(int sourcece) {
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // note for tertiary we can't use the collator->tertiaryMask, that
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // is a preprocessed mask that takes into account case options. since
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we are only concerned with exact matches, we don't need that.
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sourcece &= ceMask_;
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (toShift_) {
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // alternate handling here, since only the 16 most significant digits
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // is only used, we can safely do a compare without masking
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if the ce is a variable, we mask and get only the primary values
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // no shifting to quartenary is required since all primary values
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // less than variabletop will need to be masked off anyway.
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (variableTop_ > sourcece) {
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (strength_ >= Collator.QUATERNARY) {
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sourcece &= PRIMARYORDERMASK;
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sourcece = CollationElementIterator.IGNORABLE;
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (strength_ >= Collator.QUATERNARY && sourcece == CollationElementIterator.IGNORABLE) {
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sourcece = 0xFFFF;
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return sourcece;
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Direct port of ICU4C static int32_t * addTouint32_tArray(...) in usearch.cpp.
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is used for appending a PCE to Pattern.PCE_ buffer. We probably should
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * implement this in Pattern class.
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destination target array
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset destination offset to add value
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destinationlength target array size
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param value to be added
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param increments incremental size expected
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return new destination array, destination if there was no new allocation
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int[] addToIntArray(int[] destination, int offset, int destinationlength,
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int value, int increments) {
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int newlength = destinationlength;
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset + 1 == newlength) {
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newlength += increments;
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int temp[] = new int[newlength];
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.arraycopy(destination, 0, temp, 0, offset);
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            destination = temp;
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        destination[offset] = value;
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return destination;
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Direct port of ICU4C static int64_t * addTouint64_tArray(...) in usearch.cpp.
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is used for appending a PCE to Pattern.PCE_ buffer. We probably should
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * implement this in Pattern class.
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destination target array
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset destination offset to add value
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destinationlength target array size
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param value to be added
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param increments incremental size expected
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return new destination array, destination if there was no new allocation
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long[] addToLongArray(long[] destination, int offset, int destinationlength,
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long value, int increments) {
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int newlength = destinationlength;
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset + 1 == newlength) {
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newlength += increments;
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long temp[] = new long[newlength];
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.arraycopy(destination, 0, temp, 0, offset);
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            destination = temp;
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        destination[offset] = value;
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return destination;
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializing the ce table for a pattern.
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Stores non-ignorable collation keys.
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Table size will be estimated by the size of the pattern text. Table
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * expansion will be perform as we go along. Adding 1 to ensure that the table
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * size definitely increases.
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return total number of expansions
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: We probably do not need Pattern CE table.
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int initializePatternCETable() {
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int[] cetable = new int[INITIAL_ARRAY_SIZE_];
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int cetablesize = cetable.length;
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patternlength = pattern_.text_.length();
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationElementIterator coleiter = utilIter_;
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (coleiter == null) {
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coleiter = new CollationElementIterator(pattern_.text_, collator_);
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            utilIter_ = coleiter;
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coleiter.setText(pattern_.text_);
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int offset = 0;
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = 0;
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ce;
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while ((ce = coleiter.next()) != CollationElementIterator.NULLORDER) {
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int newce = getCE(ce);
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (newce != CollationElementIterator.IGNORABLE /* 0 */) {
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int[] temp = addToIntArray(cetable, offset, cetablesize, newce,
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        patternlength - coleiter.getOffset() + 1);
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offset++;
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cetable = temp;
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result += (coleiter.getMaxExpansion(ce) - 1);
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        cetable[offset] = 0;
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.CE_ = cetable;
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.CELength_ = offset;
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializing the pce table for a pattern.
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Stores non-ignorable collation keys.
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Table size will be estimated by the size of the pattern text. Table
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * expansion will be perform as we go along. Adding 1 to ensure that the table
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * size definitely increases.
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return total number of expansions
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int initializePatternPCETable() {
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long[] pcetable = new long[INITIAL_ARRAY_SIZE_];
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int pcetablesize = pcetable.length;
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patternlength = pattern_.text_.length();
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationElementIterator coleiter = utilIter_;
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (coleiter == null) {
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coleiter = new CollationElementIterator(pattern_.text_, collator_);
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            utilIter_ = coleiter;
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coleiter.setText(pattern_.text_);
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int offset = 0;
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = 0;
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long pce;
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationPCE iter = new CollationPCE(coleiter);
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ** Should processed CEs be signed or unsigned?
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ** (the rest of the code in this file seems to play fast-and-loose with
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ** whether a CE is signed or unsigned. For example, look at routine above this one.)
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while ((pce = iter.nextProcessed(null)) != CollationPCE.PROCESSED_NULLORDER) {
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long[] temp = addToLongArray(pcetable, offset, pcetablesize, pce, patternlength - coleiter.getOffset() + 1);
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset++;
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pcetable = temp;
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pcetable[offset] = 0;
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.PCE_ = pcetable;
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.PCELength_ = offset;
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: This method only triggers initializePatternCETable(), which is probably no
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //      longer needed.
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int initializePattern() {
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Since the strength is primary, accents are ignored in the pattern.
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // *** Boyer-Moore ***
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (strength_ == Collator.PRIMARY) {
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.hasPrefixAccents_ = false;
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.hasSuffixAccents_ = false;
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.hasPrefixAccents_ = (getFCD(pattern_.text_, 0) >>> SECOND_LAST_BYTE_SHIFT_) != 0;
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.hasSuffixAccents_ = (getFCD(pattern_.text_.codePointBefore(pattern_.text_.length())) & LAST_BYTE_MASK_) != 0;
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        */
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pattern_.PCE_ = null;
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // since intializePattern is an internal method status is a success.
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return initializePatternCETable();
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // *** Boyer-Moore ***
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     private final void setShiftTable(char shift[],
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         char backshift[],
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         int cetable[], int cesize,
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         int expansionsize,
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         int defaultforward,
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                         int defaultbackward) {
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         // No implementation
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     }
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: This method only triggers initializePattern(), which is probably no
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //      longer needed.
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void initialize() {
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* int expandlength = */ initializePattern();
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // *** Boyer-Moore ***
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ > 0) {
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int cesize = pattern_.CELength_;
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int minlength = cesize > expandlength ? cesize - expandlength : 1;
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pattern_.defaultShiftSize_ = minlength;
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setShiftTable(pattern_.shift_, pattern_.backShift_, pattern_.CE_, cesize,
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    expandlength, minlength, minlength);
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return pattern_.defaultShiftSize_;
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        */
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void setMatchNotFound() {
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super.setMatchNotFound();
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // SearchIterator#setMatchNotFound() does following:
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //      search_.matchedIndex_ = DONE;
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //      search_.setMatchedLength(0);
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.isForwardSearching_) {
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(search_.text().getEndIndex());
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(0);
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Checks if the offset runs out of the text string range
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param textstart offset of the first character in the range
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param textlimit limit offset of the text string range
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset to test
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if offset is out of bounds, false otherwise
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean isOutOfBounds(int textstart, int textlimit, int offset) {
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return offset < textstart || offset > textlimit;
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Checks for identical match
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param start offset of possible match
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param end offset of possible match
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return TRUE if identical match is found
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean checkIdentical(int start, int end) {
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (strength_ != Collator.IDENTICAL) {
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: We could use Normalizer::compare() or similar, but for short strings
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // which may not be in FCD it might be faster to just NFD them.
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String textstr = getString(targetText, start, end - start);
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Normalizer.quickCheck(textstr, Normalizer.NFD, 0) == Normalizer.NO) {
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textstr = Normalizer.decompose(textstr, false);
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String patternstr = pattern_.text_;
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Normalizer.quickCheck(patternstr, Normalizer.NFD, 0) == Normalizer.NO) {
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            patternstr = Normalizer.decompose(patternstr, false);
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return textstr.equals(patternstr);
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean initTextProcessedIter() {
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (textProcessedIter_ == null) {
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textProcessedIter_ = new CollationPCE(textIter_);
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textProcessedIter_.init(textIter_);
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Find the next break boundary after startIndex. If the UStringSearch object
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * has an external break iterator, use that. Otherwise use the internal character
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break iterator.
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int nextBoundaryAfter(int startIndex) {
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIterator breakiterator = search_.breakIter();
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (breakiterator == null) {
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            breakiterator = search_.internalBreakIter_;
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (breakiterator != null) {
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return breakiterator.following(startIndex);
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return startIndex;
9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns TRUE if index is on a break boundary. If the UStringSearch
9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * has an external break iterator, test using that, otherwise test
9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * using the internal character break iterator.
9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isBreakBoundary(int index) {
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIterator breakiterator = search_.breakIter();
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (breakiterator == null) {
9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            breakiterator = search_.internalBreakIter_;
9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (breakiterator != null && breakiterator.isBoundary(index));
9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note: Followings are corresponding to UCompareCEsResult enum
9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CE_MATCH = -1;
9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CE_NO_MATCH = 0;
9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CE_SKIP_TARG = 1;
9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CE_SKIP_PATN = 2;
9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int CE_LEVEL2_BASE = 0x00000005;
9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int CE_LEVEL3_BASE = 0x00050000;
9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int compareCE64s(long targCE, long patCE, ElementComparisonType compareType) {
9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (targCE == patCE) {
9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return CE_MATCH;
9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (compareType == ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return CE_NO_MATCH;
9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long targCEshifted = targCE >>> 32;
9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long patCEshifted = patCE >>> 32;
9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long mask;
9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mask = 0xFFFF0000L;
9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targLev1 = (int)(targCEshifted & mask);
9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patLev1 = (int)(patCEshifted & mask);
9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (targLev1 != patLev1) {
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (targLev1 == 0) {
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CE_SKIP_TARG;
9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (patLev1 == 0
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    && compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD) {
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CE_SKIP_PATN;
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return CE_NO_MATCH;
9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mask = 0x0000FFFFL;
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targLev2 = (int)(targCEshifted & mask);
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patLev2 = (int)(patCEshifted & mask);
9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (targLev2 != patLev2) {
9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (targLev2 == 0) {
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CE_SKIP_TARG;
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (patLev2 == 0
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    && compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD) {
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CE_SKIP_PATN;
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (patLev2 == CE_LEVEL2_BASE ||
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD &&
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targLev2 == CE_LEVEL2_BASE)) ? CE_MATCH : CE_NO_MATCH;
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mask = 0xFFFF0000L;
9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targLev3 = (int)(targCE & mask);
9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patLev3 = (int)(patCE & mask);
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (targLev3 != patLev3) {
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (patLev3 == CE_LEVEL3_BASE ||
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (compareType == ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD &&
9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targLev3 == CE_LEVEL3_BASE) )? CE_MATCH: CE_NO_MATCH;
9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return CE_MATCH;
9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * An object used for receiving matched index in search() and
9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * searchBackwards().
9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class Match {
10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int start_ = -1;
10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limit_ = -1;
10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean search(int startIdx, Match m) {
10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Input parameter sanity check.
10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ == 0
10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || startIdx < search_.beginIndex()
10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || startIdx > search_.endIndex()) {
10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("search(" + startIdx + ", m) - expected position to be between " +
10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    search_.beginIndex() + " and " + search_.endIndex());
10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.PCE_ == null) {
10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            initializePatternPCETable();
10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        textIter_.setOffset(startIdx);
10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEBuffer ceb = new CEBuffer(this);
10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targetIx = 0;
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI targetCEI = null;
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patIx;
10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean found;
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mStart = -1;
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mLimit = -1;
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int minLimit;
10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int maxLimit;
10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Outer loop moves over match starting positions in the
10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //      target CE space.
10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Here we see the target as a sequence of collation elements, resulting from the following:
10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 1. Target characters were decomposed, and (if appropriate) other compressions and expansions are applied
10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    (for example, digraphs such as IJ may be broken into two characters).
10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 2. An int64_t CE weight is determined for each resulting unit (high 16 bits are primary strength, next
10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    16 bits are secondary, next 16 (the high 16 bits of the low 32-bit half) are tertiary. Any of these
10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    fields that are for strengths below that of the collator are set to 0. If this makes the int64_t
10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    CE weight 0 (as for a combining diacritic with secondary weight when the collator strentgh is primary),
10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    then the CE is deleted, so the following code sees only CEs that are relevant.
10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // For each CE, the lowIndex and highIndex correspond to where this CE begins and ends in the original text.
10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If lowIndex==highIndex, either the CE resulted from an expansion/decomposition of one of the original text
10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // characters, or the CE marks the limit of the target text (in which case the CE weight is UCOL_PROCESSED_NULLORDER).
10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (targetIx = 0; ; targetIx++) {
10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            found = true;
10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Inner loop checks for a match beginning at each
10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // position from the outer loop.
10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int targetIxOffset = 0;
10497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long patCE = 0;
10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // For targetIx > 0, this ceb.get gets a CE that is as far back in the ring buffer
10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // (compared to the last CE fetched for the previous targetIx value) as we need to go
10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for this targetIx value, so if it is non-NULL then other ceb.get calls should be OK.
10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI firstCEI = ceb.get(targetIx);
10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (firstCEI == null) {
10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new ICUException("CEBuffer.get(" + targetIx + ") returned null.");
10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (patIx = 0; patIx < pattern_.PCELength_; patIx++) {
10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                patCE = pattern_.PCE_[patIx];
10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                targetCEI = ceb.get(targetIx + patIx + targetIxOffset);
10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Compare CE from target string with CE from the pattern.
10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // which will fail the compare, below.
10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ceMatch = compareCE64s(targetCEI.ce_, patCE, search_.elementComparisonType_);
10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ceMatch == CE_NO_MATCH) {
10667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    found = false;
10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if (ceMatch > CE_NO_MATCH) {
10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (ceMatch == CE_SKIP_TARG) {
10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // redo with same patCE, next targCE
10717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        patIx--;
10727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targetIxOffset++;
10737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else { // ceMatch == CE_SKIP_PATN
10747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // redo with same targCE, next patCE
10757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targetIxOffset--;
10767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
10777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
10787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            targetIxOffset += pattern_.PCELength_; // this is now the offset in target CE space to end of the match so far
10807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!found && ((targetCEI == null) || (targetCEI.ce_ != CollationPCE.PROCESSED_NULLORDER))) {
10827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No match at this targetIx.  Try again at the next.
10837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
10847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!found) {
10877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No match at all, we have run off the end of the target text.
10887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We have found a match in CE space.
10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Now determine the bounds in string index space.
10937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // There still is a chance of match failure if the CE range not correspond to
10947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // an acceptable character range.
10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //
10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI lastCEI = ceb.get(targetIx + targetIxOffset -1);
10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mStart = firstCEI.lowIndex_;
10997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minLimit = lastCEI.lowIndex_;
11007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look at the CE following the match.  If it is UCOL_NULLORDER the match
11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // extended to the end of input, and the match is good.
11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look at the high and low indices of the CE following the match. If
11057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // they are the same it means one of two things:
11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //    1. The match extended to the last CE from the target text, which is OK, or
11077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //    2. The last CE that was part of the match is in an expansion that extends
11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //       to the first CE after the match. In this case, we reject the match.
11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI nextCEI = null;
11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.elementComparisonType_ == ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                nextCEI = ceb.get(targetIx + targetIxOffset);
11127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                maxLimit = nextCEI.lowIndex_;
11137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (nextCEI.lowIndex_ == nextCEI.highIndex_ && nextCEI.ce_ != CollationPCE.PROCESSED_NULLORDER) {
11147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    found = false;
11157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
11177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (;; ++targetIxOffset) {
11187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    nextCEI = ceb.get(targetIx + targetIxOffset);
11197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    maxLimit = nextCEI.lowIndex_;
11207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If we are at the end of the target too, match succeeds
11217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (nextCEI.ce_ == CollationPCE.PROCESSED_NULLORDER) {
11227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
11237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // As long as the next CE has primary weight of 0,
11257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // it is part of the last target element matched by the pattern;
11267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // make sure it can be part of a match with the last patCE
11277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if ((((nextCEI.ce_) >>> 32) & 0xFFFF0000L) == 0) {
11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        int ceMatch = compareCE64s(nextCEI.ce_, patCE, search_.elementComparisonType_);
11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (ceMatch == CE_NO_MATCH || ceMatch == CE_SKIP_PATN ) {
11307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            found = false;
11317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            break;
11327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
11337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If lowIndex == highIndex, this target CE is part of an expansion of the last matched
11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // target element, but it has non-zero primary weight => match fails
11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if ( nextCEI.lowIndex_ == nextCEI.highIndex_ ) {
11367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        found = false;
11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
11387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Else the target CE is not part of an expansion of the last matched element, match succeeds
11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check for the start of the match being within a combining sequence.
11467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This can happen if the pattern itself begins with a combining char, and
11477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the match found combining marks in the target text that were attached
11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // to something else.
11497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This type of match should be rejected for not completely consuming a
11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // combining sequence.
11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!isBreakBoundary(mStart)) {
11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check for the start of the match being within an Collation Element Expansion,
11567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // meaning that the first char of the match is only partially matched.
11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // With expansions, the first CE will report the index of the source
11587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // character, and all subsequent (expansions) CEs will report the source index of the
11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // _following_ character.
11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int secondIx = firstCEI.highIndex_;
11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (mStart == secondIx) {
11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
11637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1165bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // Allow matches to end in the middle of a grapheme cluster if the following
1166bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // conditions are met; this is needed to make prefix search work properly in
1167bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // Indic, see #11750
1168bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * the default breakIter is being used
1169bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * the next collation element after this combining sequence
1170bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   - has non-zero primary weight
1171bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   - corresponds to a separate character following the one at end of the current match
1172bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   (the second of these conditions, and perhaps both, may be redundant given the
1173bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   subsequent check for normalization boundary; however they are likely much faster
1174bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   tests in any case)
1175bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * the match limit is a normalization boundary
1176bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            boolean allowMidclusterMatch =
1177bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                            breakIterator == null &&
11782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            (((nextCEI.ce_) >>> 32) & 0xFFFF0000L) != 0 &&
1179bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                            maxLimit >= lastCEI.highIndex_ && nextCEI.highIndex_ > maxLimit &&
1180bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                            (nfd_.hasBoundaryBefore(codePointAt(targetText, maxLimit)) ||
1181bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                    nfd_.hasBoundaryAfter(codePointBefore(targetText, maxLimit)));
1182bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1183bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // If those conditions are met, then:
1184bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * do NOT advance the candidate match limit (mLimit) to a break boundary; however
1185bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   the match limit may be backed off to a previous break boundary. This handles
1186bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   cases in which mLimit includes target characters that are ignorable with current
1187bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            //   settings (such as space) and which extend beyond the pattern match.
1188bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * do NOT require that end of the combining sequence not extend beyond the match in CE space
1189bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // * do NOT require that match limit be on a breakIter boundary
1190bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
11917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Advance the match end position to the first acceptable match boundary.
11927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This advances the index over any combining characters.
11937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mLimit = maxLimit;
11947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (minLimit < maxLimit) {
11957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // When the last CE's low index is same with its high index, the CE is likely
11967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // a part of expansion. In this case, the index is located just after the
11977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // character corresponding to the CEs compared above. If the index is right
11987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // at the break boundary, move the position to the next boundary will result
11997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // incorrect match length when there are ignorable characters exist between
12007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the position and the next character produces CE(s). See ticket#8482.
12017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (minLimit == lastCEI.highIndex_ && isBreakBoundary(minLimit)) {
12027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    mLimit = minLimit;
12037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
12047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int nba = nextBoundaryAfter(minLimit);
1205bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // Note that we can have nba < maxLimit && nba >= minLImit, in which
1206bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // case we want to set mLimit to nba regardless of allowMidclusterMatch
1207bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // (i.e. we back off mLimit to the previous breakIterator boundary).
1208bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (nba >= lastCEI.highIndex_ && (!allowMidclusterMatch || nba < maxLimit)) {
12097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        mLimit = nba;
12107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
12117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1214bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (!allowMidclusterMatch) {
1215bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // If advancing to the end of a combining sequence in character indexing space
1216bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // advanced us beyond the end of the match in CE space, reject this match.
1217bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (mLimit > maxLimit) {
1218bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    found = false;
1219bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
12207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1221bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (!isBreakBoundary(mLimit)) {
1222bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    found = false;
1223bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
12247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!checkIdentical(mStart, mLimit)) {
12277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
12287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (found) {
12317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
12327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // All Done.  Store back the match bounds to the caller.
12367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (found == false) {
12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mLimit = -1;
12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mStart = -1;
12407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (m != null) {
12437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m.start_ = mStart;
12447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m.limit_ = mLimit;
12457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return found;
12487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1250bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private static int codePointAt(CharacterIterator iter, int index) {
1251bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int currentIterIndex = iter.getIndex();
1252bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        char codeUnit = iter.setIndex(index);
1253bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int cp = codeUnit;
1254bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (Character.isHighSurrogate(codeUnit)) {
1255bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            char nextUnit = iter.next();
1256bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (Character.isLowSurrogate(nextUnit)) {
1257bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                cp = Character.toCodePoint(codeUnit, nextUnit);
1258bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
1259bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
1260bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        iter.setIndex(currentIterIndex);  // restore iter position
1261bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return cp;
1262bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
1263bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1264bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private static int codePointBefore(CharacterIterator iter, int index) {
1265bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int currentIterIndex = iter.getIndex();
1266bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        iter.setIndex(index);
1267bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        char codeUnit = iter.previous();
1268bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int cp = codeUnit;
1269bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (Character.isLowSurrogate(codeUnit)) {
1270bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            char prevUnit = iter.previous();
1271bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (Character.isHighSurrogate(prevUnit)) {
1272bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                cp = Character.toCodePoint(prevUnit, codeUnit);
1273bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
1274bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
1275bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        iter.setIndex(currentIterIndex);  // restore iter position
1276bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return cp;
1277bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
1278bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
12797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean searchBackwards(int startIdx, Match m) {
12807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //ICU4C_TODO comment:  reject search patterns beginning with a combining char.
12817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Input parameter sanity check.
12837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.CELength_ == 0
12847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || startIdx < search_.beginIndex()
12857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || startIdx > search_.endIndex()) {
12867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("searchBackwards(" + startIdx + ", m) - expected position to be between " +
12877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    search_.beginIndex() + " and " + search_.endIndex());
12887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (pattern_.PCE_ == null) {
12917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            initializePatternPCETable();
12927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEBuffer ceb = new CEBuffer(this);
12957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int targetIx = 0;
12967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
12987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Pre-load the buffer with the CE's for the grapheme
12997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * after our starting position so that we're sure that
13007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * we can look at the CE following the match when we
13017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * check the match boundaries.
13027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
13037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * This will also pre-fetch the first CE that we'll
13047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * consider for the match.
13057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
13067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (startIdx < search_.endIndex()) {
13077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BreakIterator bi = search_.internalBreakIter_;
13087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int next = bi.following(startIdx);
13097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(next);
13117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (targetIx = 0; ; targetIx++) {
13137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ceb.getPrevious(targetIx).lowIndex_ < startIdx) {
13147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
13157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
13187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textIter_.setOffset(startIdx);
13197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI targetCEI = null;
13227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int patIx;
13237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean found;
13247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limitIx = targetIx;
13267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mStart = -1;
13277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mLimit = -1;
13287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int minLimit;
13297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int maxLimit;
13307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Outer loop moves over match starting positions in the
13327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //      target CE space.
13337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Here, targetIx values increase toward the beginning of the base text (i.e. we get the text CEs in reverse order).
13347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // But  patIx is 0 at the beginning of the pattern and increases toward the end.
13357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // So this loop performs a comparison starting with the end of pattern, and prcessd toward the beginning of the pattern
13367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and the beginning of the base text.
13377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (targetIx = limitIx; ; targetIx++) {
13387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            found = true;
13397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // For targetIx > limitIx, this ceb.getPrevious gets a CE that is as far back in the ring buffer
13407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // (compared to the last CE fetched for the previous targetIx value) as we need to go
13417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for this targetIx value, so if it is non-NULL then other ceb.getPrevious calls should be OK.
13427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI lastCEI = ceb.getPrevious(targetIx);
13437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (lastCEI == null) {
13447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new ICUException("CEBuffer.getPrevious(" + targetIx + ") returned null.");
13457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Inner loop checks for a match beginning at each
13477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // position from the outer loop.
13487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int targetIxOffset = 0;
13497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (patIx = pattern_.PCELength_ - 1; patIx >= 0; patIx--) {
13507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long patCE = pattern_.PCE_[patIx];
13517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                targetCEI = ceb.getPrevious(targetIx + pattern_.PCELength_ - 1 - patIx + targetIxOffset);
13537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Compare CE from target string with CE from the pattern.
13547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
13557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // which will fail the compare, below.
13567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ceMatch = compareCE64s(targetCEI.ce_, patCE, search_.elementComparisonType_);
13577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ceMatch == CE_NO_MATCH) {
13587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    found = false;
13597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
13607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if (ceMatch > CE_NO_MATCH) {
13617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (ceMatch == CE_SKIP_TARG) {
13627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // redo with same patCE, next targCE
13637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        patIx++;
13647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targetIxOffset++;
13657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else { // ceMatch == CE_SKIP_PATN
13667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // redo with same targCE, next patCE
13677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        targetIxOffset--;
13687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
13697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!found && ((targetCEI == null) || (targetCEI.ce_ != CollationPCE.PROCESSED_NULLORDER))) {
13737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No match at this targetIx.  Try again at the next.
13747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
13757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!found) {
13787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No match at all, we have run off the end of the target text.
13797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
13807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We have found a match in CE space.
13837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Now determine the bounds in string index space.
13847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // There still is a chance of match failure if the CE range not correspond to
13857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // an acceptable character range.
13867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //
13877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CEI firstCEI = ceb.getPrevious(targetIx + pattern_.PCELength_ - 1 + targetIxOffset);
13887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mStart = firstCEI.lowIndex_;
13897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check for the start of the match being within a combining sequence.
13917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This can happen if the pattern itself begins with a combining char, and
13927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the match found combining marks in the target text that were attached
13937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // to something else.
13947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This type of match should be rejected for not completely consuming a
13957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // combining sequence.
13967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!isBreakBoundary(mStart)) {
13977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
13987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look at the high index of the first CE in the match. If it's the same as the
14017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // low index, the first CE in the match is in the middle of an expansion.
14027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (mStart == firstCEI.highIndex_) {
14037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
14047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minLimit = lastCEI.lowIndex_;
14077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (targetIx > 0) {
14097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Look at the CE following the match.  If it is UCOL_NULLORDER the match
14107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // extended to the end of input, and the match is good.
14117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Look at the high and low indices of the CE following the match. If
14137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // they are the same it means one of two things:
14147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    1. The match extended to the last CE from the target text, which is OK, or
14157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    2. The last CE that was part of the match is in an expansion that extends
14167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //       to the first CE after the match. In this case, we reject the match.
14177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                CEI nextCEI  = ceb.getPrevious(targetIx - 1);
14187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (nextCEI.lowIndex_ == nextCEI.highIndex_ && nextCEI.ce_ != CollationPCE.PROCESSED_NULLORDER) {
14207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    found = false;
14217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                mLimit = maxLimit = nextCEI.lowIndex_;
14247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1425bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // Allow matches to end in the middle of a grapheme cluster if the following
1426bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // conditions are met; this is needed to make prefix search work properly in
1427bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // Indic, see #11750
1428bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * the default breakIter is being used
1429bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * the next collation element after this combining sequence
1430bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   - has non-zero primary weight
1431bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   - corresponds to a separate character following the one at end of the current match
1432bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   (the second of these conditions, and perhaps both, may be redundant given the
1433bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   subsequent check for normalization boundary; however they are likely much faster
1434bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   tests in any case)
1435bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * the match limit is a normalization boundary
1436bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                boolean allowMidclusterMatch =
1437bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                breakIterator == null &&
14382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                                (((nextCEI.ce_) >>> 32) & 0xFFFF0000L) != 0 &&
1439bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                maxLimit >= lastCEI.highIndex_ && nextCEI.highIndex_ > maxLimit &&
1440bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                (nfd_.hasBoundaryBefore(codePointAt(targetText, maxLimit)) ||
1441bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                                        nfd_.hasBoundaryAfter(codePointBefore(targetText, maxLimit)));
1442bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
1443bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // If those conditions are met, then:
1444bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * do NOT advance the candidate match limit (mLimit) to a break boundary; however
1445bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   the match limit may be backed off to a previous break boundary. This handles
1446bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   cases in which mLimit includes target characters that are ignorable with current
1447bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                //   settings (such as space) and which extend beyond the pattern match.
1448bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * do NOT require that end of the combining sequence not extend beyond the match in CE space
1449bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // * do NOT require that match limit be on a breakIter boundary
1450bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
14517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Advance the match end position to the first acceptable match boundary.
14527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // This advances the index over any combining charcters.
14537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (minLimit < maxLimit) {
14547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int nba = nextBoundaryAfter(minLimit);
1455bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // Note that we can have nba < maxLimit && nba >= minLImit, in which
1456bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // case we want to set mLimit to nba regardless of allowMidclusterMatch
1457bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // (i.e. we back off mLimit to the previous breakIterator boundary).
1458bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (nba >= lastCEI.highIndex_ && (!allowMidclusterMatch || nba < maxLimit)) {
14597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        mLimit = nba;
14607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
14617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1463bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (!allowMidclusterMatch) {
1464bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // If advancing to the end of a combining sequence in character indexing space
1465bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // advanced us beyond the end of the match in CE space, reject this match.
1466bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (mLimit > maxLimit) {
1467bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        found = false;
1468bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    }
14697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1470bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    // Make sure the end of the match is on a break boundary
1471bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (!isBreakBoundary(mLimit)) {
1472bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        found = false;
1473bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    }
14747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
14777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No non-ignorable CEs after this point.
14787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The maximum position is detected by boundary after
14797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the last non-ignorable CE. Combining sequence
14807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // across the start index will be truncated.
14817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int nba = nextBoundaryAfter(minLimit);
14827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx;
14837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!checkIdentical(mStart, mLimit)) {
14867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                found = false;
14877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (found) {
14907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
14917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // All Done.  Store back the match bounds to the caller.
14957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
14967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (found == false) {
14977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mLimit = -1;
14987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mStart = -1;
14997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (m != null) {
15027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m.start_ = mStart;
15037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m.limit_ = mLimit;
15047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return found;
15077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note:
15107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
15117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ICU4C usearch_handleNextExact() is identical to usearch_handleNextCanonical()
15127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // for the linear search implementation. The differences are addressed in search().
15137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
15147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handleNextExact() {
15157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handleNextCommonImpl();
15167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handleNextCanonical() {
15197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handleNextCommonImpl();
15207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handleNextCommonImpl() {
15237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int textOffset = textIter_.getOffset();
15247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Match match = new Match();
15257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search(textOffset, match)) {
15277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.matchedIndex_ = match.start_;
15287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.setMatchedLength(match.limit_ - match.start_);
15297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
15307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
15317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setMatchNotFound();
15327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
15337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note:
15377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
15387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ICU4C usearch_handlePreviousExact() is identical to usearch_handlePreviousCanonical()
15397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // for the linear search implementation. The differences are addressed in searchBackwards().
15407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
15417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handlePreviousExact() {
15427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handlePreviousCommonImpl();
15437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handlePreviousCanonical() {
15467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handlePreviousCommonImpl();
15477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean handlePreviousCommonImpl() {
15507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int textOffset;
15517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.isOverlap_) {
15537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ != DONE) {
15547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textOffset = search_.matchedIndex_ + search_.matchedLength() - 1;
15557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
15567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // move the start position at the end of possible match
15577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                initializePatternPCETable();
15587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (!initTextProcessedIter()) {
15597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setMatchNotFound();
15607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
15617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int nPCEs = 0; nPCEs < pattern_.PCELength_ - 1; nPCEs++) {
15637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    long pce = textProcessedIter_.nextProcessed(null);
15647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (pce == CollationPCE.PROCESSED_NULLORDER) {
15657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // at the end of the text
15667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
15677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
15687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                textOffset = textIter_.getOffset();
15707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
15717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
15727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            textOffset = textIter_.getOffset();
15737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Match match = new Match();
15767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (searchBackwards(textOffset, match)) {
15777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.matchedIndex_ = match.start_;
15787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.setMatchedLength(match.limit_ - match.start_);
15797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
15807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
15817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setMatchNotFound();
15827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
15837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets a substring out of a CharacterIterator
15887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java porting note: Not available in ICU4C
15907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param text CharacterIterator
15927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param start start offset
15937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param length of substring
15947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return substring from text starting at start and length length
15957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String getString(CharacterIterator text, int start, int length) {
15977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder result = new StringBuilder(length);
15987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int offset = text.getIndex();
15997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setIndex(start);
16007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < length; i++) {
16017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.append(text.current());
16027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.next();
16037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setIndex(offset);
16057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result.toString();
16067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java port of ICU4C struct UPattern (usrchimp.h)
16107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class Pattern {
16127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Pattern string */
16137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String text_;
16147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long[] PCE_;
16167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int PCELength_ = 0;
16177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: We probably do not need CE_ / CELength_
16197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        @SuppressWarnings("unused")
16207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int[] CE_;
16217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int CELength_ = 0;
16227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // *** Boyer-Moore ***
16247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // boolean hasPrefixAccents_ = false;
16257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // boolean hasSuffixAccents_ = false;
16267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // int defaultShiftSize_;
16277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // char[] shift_;
16287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // char[] backShift_;
16297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Pattern(String pattern) {
16317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text_ = pattern;
16327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java port of ICU4C UCollationPCE (usrchimp.h)
16377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class CollationPCE {
16397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final long PROCESSED_NULLORDER = -1;
16407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int DEFAULT_BUFFER_SIZE = 16;
16427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int BUFFER_GROW = 8;
16437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: PRIMARYORDERMASK is also duplicated in StringSearch class
16457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int PRIMARYORDERMASK = 0xffff0000;
16467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int CONTINUATION_MARKER = 0xc0;
16477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private PCEBuffer pceBuffer_ = new PCEBuffer();
16497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private CollationElementIterator cei_;
16507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int strength_;
16517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private boolean toShift_;
16527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private boolean isShifted_;
16537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int variableTop_;
16547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CollationPCE(CollationElementIterator iter) {
16567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            init(iter);
16577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void init(CollationElementIterator iter) {
16607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cei_ = iter;
16617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            init(iter.getRuleBasedCollator());
16627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private void init(RuleBasedCollator coll) {
16657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength_ = coll.getStrength();
16667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            toShift_ = coll.isAlternateHandlingShifted();
16677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isShifted_ = false;
16687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            variableTop_ = coll.getVariableTop();
16697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        @SuppressWarnings("fallthrough")
16727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private long processCE(int ce) {
16737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
16747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This is clean, but somewhat slow...
16767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We could apply the mask to ce and then
16777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // just get all three orders...
16787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            switch (strength_) {
16797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            default:
16807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tertiary = CollationElementIterator.tertiaryOrder(ce);
16817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* note fall-through */
16827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collator.SECONDARY:
16847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                secondary = CollationElementIterator.secondaryOrder(ce);
16857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* note fall-through */
16867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collator.PRIMARY:
16887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                primary = CollationElementIterator.primaryOrder(ce);
16897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
16907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** This should probably handle continuations too. ****
16927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** That means that we need 24 bits for the primary ****
16937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** instead of the 16 that we're currently using. ****
16947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** So we can lay out the 64 bits as: 24.12.12.16. ****
16957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** Another complication with continuations is that ****
16967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** the *second* CE is marked as a continuation, so ****
16977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** we always have to peek ahead to know how long ****
16987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // **** the primary is... ****
16997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if ((toShift_ && variableTop_ > ce && primary != 0) || (isShifted_ && primary == 0)) {
17007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (primary == 0) {
17027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return CollationElementIterator.IGNORABLE;
17037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (strength_ >= Collator.QUATERNARY) {
17067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    quaternary = primary;
17077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                primary = secondary = tertiary = 0;
17107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                isShifted_ = true;
17117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
17127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (strength_ >= Collator.QUATERNARY) {
17137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    quaternary = 0xFFFF;
17147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                isShifted_ = false;
17177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
17187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
17207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
17237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Get the processed ordering priority of the next collation element in the text.
17247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * A single character may contain more than one collation element.
17257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
17267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Note: This is equivalent to
17277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * UCollationPCE::nextProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
17287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
17297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param range receiving the iterator index before/after fetching the CE.
17307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @return The next collation elements ordering, otherwise returns PROCESSED_NULLORDER
17317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *         if an error has occurred or if the end of string has been reached
17327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
17337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public long nextProcessed(Range range) {
17347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long result = CollationElementIterator.IGNORABLE;
17357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int low = 0, high = 0;
17367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pceBuffer_.reset();
17387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            do {
17407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                low = cei_.getOffset();
17417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ce = cei_.next();
17427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                high = cei_.getOffset();
17437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ce == CollationElementIterator.NULLORDER) {
17457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     result = PROCESSED_NULLORDER;
17467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     break;
17477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = processCE(ce);
17507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } while (result == CollationElementIterator.IGNORABLE);
17517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (range != null) {
17537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                range.ixLow_ = low;
17547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                range.ixHigh_ = high;
17557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
17567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
17587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
17617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Get the processed ordering priority of the previous collation element in the text.
17627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * A single character may contain more than one collation element.
17637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
17647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Note: This is equivalent to
17657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * UCollationPCE::previousProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
17667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
17677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param range receiving the iterator index before/after fetching the CE.
17687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @return The previous collation elements ordering, otherwise returns
17697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *         PROCESSED_NULLORDER if an error has occurred or if the start of
17707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *         string has been reached.
17717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
17727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public long previousProcessed(Range range) {
17737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long result = CollationElementIterator.IGNORABLE;
17747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int low = 0, high = 0;
17757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // pceBuffer_.reset();
17777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (pceBuffer_.empty()) {
17797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // buffer raw CEs up to non-ignorable primary
17807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                RCEBuffer rceb = new RCEBuffer();
17817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ce;
17827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean finish = false;
17847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // **** do we need to reset rceb, or will it always be empty at this point ****
17867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
17877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    high = cei_.getOffset();
17887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ce = cei_.previous();
17897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    low = cei_.getOffset();
17907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (ce == CollationElementIterator.NULLORDER) {
17927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (!rceb.empty()) {
17937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            break;
17947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
17957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        finish = true;
17977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
17987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
17997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    rceb.put(ce, low, high);
18017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } while ((ce & PRIMARYORDERMASK) == 0 || isContinuation(ce));
18027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (finish) {
18047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
18057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // process the raw CEs
18087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (!rceb.empty()) {
18097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    RCEI rcei = rceb.get();
18107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    result = processCE(rcei.ce_);
18127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (result != CollationElementIterator.IGNORABLE) {
18147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        pceBuffer_.put(result, rcei.low_, rcei.high_);
18157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
18167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (pceBuffer_.empty()) {
18207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // **** Is -1 the right value for ixLow, ixHigh? ****
18217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (range != null) {
18227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    range.ixLow_ = -1;
18237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    range.ixHigh_ = -1;
18247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return CollationElementIterator.NULLORDER;
18267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            PCEI pcei = pceBuffer_.get();
18297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (range != null) {
18317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                range.ixLow_ = pcei.low_;
18327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                range.ixHigh_ = pcei.high_;
18337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return pcei.ce_;
18367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static boolean isContinuation(int ce) {
18397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return ((ce & CONTINUATION_MARKER) == CONTINUATION_MARKER);
18407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final class Range {
18437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int ixLow_;
18447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int ixHigh_;
18457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Processed collation element buffer stuff ported from ICU4C ucoleitr.cpp */
18487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final class PCEI {
18497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long ce_;
18507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int low_;
18517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int high_;
18527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final class PCEBuffer {
18557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private PCEI[] buffer_ = new PCEI[DEFAULT_BUFFER_SIZE];
18567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private int bufferIndex_ = 0;
18577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            void reset() {
18597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bufferIndex_ = 0;
18607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean empty() {
18637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return bufferIndex_ <= 0;
18647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            void put(long ce, int ixLow, int ixHigh)
18677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            {
18687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (bufferIndex_ >= buffer_.length) {
18697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    PCEI[] newBuffer = new PCEI[buffer_.length + BUFFER_GROW];
18707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.arraycopy(buffer_, 0, newBuffer, 0, buffer_.length);
18717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer_ = newBuffer;
18727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_] = new PCEI();
18747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].ce_ = ce;
18757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].low_ = ixLow;
18767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].high_ = ixHigh;
18777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bufferIndex_ += 1;
18797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            PCEI get() {
18827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (bufferIndex_ > 0) {
18837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return buffer_[--bufferIndex_];
18847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
18867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Raw collation element buffer stuff ported from ICU4C ucoleitr.cpp */
18907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final class RCEI {
18917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int ce_;
18927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int low_;
18937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int high_;
18947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final class RCEBuffer {
18977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private RCEI[] buffer_ = new RCEI[DEFAULT_BUFFER_SIZE];
18987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private int bufferIndex_ = 0;
18997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean empty() {
19017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return bufferIndex_ <= 0;
19027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            void put(int ce, int ixLow, int ixHigh) {
19057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (bufferIndex_ >= buffer_.length) {
19067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    RCEI[] newBuffer = new RCEI[buffer_.length + BUFFER_GROW];
19077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.arraycopy(buffer_, 0, newBuffer, 0, buffer_.length);
19087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer_ = newBuffer;
19097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
19107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_] = new RCEI();
19117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].ce_ = ce;
19127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].low_ = ixLow;
19137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer_[bufferIndex_].high_ = ixHigh;
19147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bufferIndex_ += 1;
19167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            RCEI get() {
19197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (bufferIndex_ > 0) {
19207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return buffer_[--bufferIndex_];
19217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
19227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
19237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
19257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
19267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
19287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java port of ICU4C CEI (usearch.cpp)
19297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * CEI  Collation Element + source text index.
19317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *      These structs are kept in the circular buffer.
19327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
19337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class CEI {
19347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long ce_;
19357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lowIndex_;
19367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int highIndex_;
19377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
19387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
19407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * CEBuffer A circular buffer of CEs from the text being searched
19417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
19427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class CEBuffer {
19437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Java porting note: ICU4C uses the size for stack buffer
19447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // static final int DEFAULT_CEBUFFER_SIZE = 96;
19457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static final int CEBUFFER_EXTRA = 32;
19477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static final int MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L = 8;
19487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static final int MAX_TARGET_IGNORABLES_PER_PAT_OTHER = 3;
19497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI[] buf_;
19517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int bufSize_;
19527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int firstIx_;
19537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limitIx_;
19547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Java porting note: No references in ICU4C implementation
19567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // CollationElementIterator ceIter_;
19577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringSearch strSearch_;
19597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEBuffer(StringSearch ss) {
19617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strSearch_ = ss;
19627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            bufSize_ = ss.pattern_.PCELength_ + CEBUFFER_EXTRA;
19637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (ss.search_.elementComparisonType_ != ElementComparisonType.STANDARD_ELEMENT_COMPARISON) {
19647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String patText = ss.pattern_.text_;
19657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (patText != null) {
19667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    for (int i = 0; i < patText.length(); i++) {
19677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        char c = patText.charAt(i);
19687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (MIGHT_BE_JAMO_L(c)) {
19697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            bufSize_ += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L;
19707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else {
19717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // No check for surrogates, we might allocate slightly more buffer than necessary.
19727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            bufSize_ += MAX_TARGET_IGNORABLES_PER_PAT_OTHER;
19737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
19747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
19757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
19767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Not used - see above
19797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // ceIter_ = ss.textIter_;
19807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            firstIx_ = 0;
19827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limitIx_ = 0;
19837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!ss.initTextProcessedIter()) {
19857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
19867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_ = new CEI[bufSize_];
19897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
19907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Get the CE with the specified index.
19927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   Index must be in the range
19937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //             n-history_size < index < n+1
19947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   where n is the largest index to have been fetched by some previous call to this function.
19957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
19967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
19977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI get(int index) {
19987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = index % bufSize_;
19997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index >= firstIx_ && index < limitIx_) {
20017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The request was for an entry already in our buffer.
20027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Just return it.
20037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return buf_[i];
20047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Caller is requesting a new, never accessed before, CE.
20077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Verify that it is the next one in sequence, which is all
20087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // that is allowed.
20097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index != limitIx_) {
20107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert(false);
20117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
20127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Manage the circular CE buffer indexing
20157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limitIx_++;
20167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (limitIx_ - firstIx_ >= bufSize_) {
20187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The buffer is full, knock out the lowest-indexed entry.
20197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                firstIx_++;
20207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationPCE.Range range = new CollationPCE.Range();
20237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (buf_[i] == null) {
20247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buf_[i] = new CEI();
20257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].ce_ = strSearch_.textProcessedIter_.nextProcessed(range);
20277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].lowIndex_ = range.ixLow_;
20287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].highIndex_ = range.ixHigh_;
20297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return buf_[i];
20317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Get the CE with the specified index.
20347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   Index must be in the range
20357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //             n-history_size < index < n+1
20367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   where n is the largest index to have been fetched by some previous call to this function.
20377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
20387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
20397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEI getPrevious(int index) {
20407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = index % bufSize_;
20417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index >= firstIx_ && index < limitIx_) {
20437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The request was for an entry already in our buffer.
20447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Just return it.
20457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return buf_[i];
20467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Caller is requesting a new, never accessed before, CE.
20497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Verify that it is the next one in sequence, which is all
20507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // that is allowed.
20517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index != limitIx_) {
20527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert(false);
20537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
20547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Manage the circular CE buffer indexing
20577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limitIx_++;
20587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (limitIx_ - firstIx_ >= bufSize_) {
20607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The buffer is full, knock out the lowest-indexed entry.
20617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                firstIx_++;
20627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationPCE.Range range = new CollationPCE.Range();
20657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (buf_[i] == null) {
20667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buf_[i] = new CEI();
20677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].ce_ = strSearch_.textProcessedIter_.previousProcessed(range);
20697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].lowIndex_ = range.ixLow_;
20707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buf_[i].highIndex_ = range.ixHigh_;
20717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return buf_[i];
20737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static boolean MIGHT_BE_JAMO_L(char c) {
20767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (c >= 0x1100 && c <= 0x115E)
20777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    || (c >= 0x3131 && c <= 0x314E)
20787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    || (c >= 0x3165 && c <= 0x3186);
20797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
20817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
2082