17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 1996-2014, International Business Machines Corporation and
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* others. All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashMap;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.CharacterIteratorWrapper;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.Collation;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.CollationData;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.CollationIterator;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.ContractionsAndExpansions;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.FCDIterCollationIterator;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.IterCollationIterator;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.UTF16CollationIterator;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.coll.UVector32;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p><code>CollationElementIterator</code> is an iterator created by
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a RuleBasedCollator to walk through a string. The return result of
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * each iteration is a 32-bit collation element (CE) that defines the
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ordering priority of the next character or sequence of characters
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * in the source string.</p>
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>For illustration, consider the following in Slovak and in traditional Spanish collation:
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "ca" -> the first collation element is CE('c') and the second
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *         collation element is CE('a').
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "cha" -> the first collation element is CE('ch') and the second
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          collation element is CE('a').
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * And in German phonebook collation,
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Since the character '&#230;' is a composed character of 'a' and 'e', the
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * iterator returns two collation elements for the single character '&#230;'
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "&#230;b" -> the first collation element is collation_element('a'), the
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *              second collation element is collation_element('e'), and the
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *              third collation element is collation_element('b').
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p>
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>For collation ordering comparison, the collation element results
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * can not be compared simply by using basic arithmetic operators,
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * e.g. &lt;, == or &gt;, further processing has to be done. Details
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * can be found in the ICU
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <a href="http://userguide.icu-project.org/collation/architecture">
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * User Guide</a>. An example of using the CollationElementIterator
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for collation ordering comparison is the class
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link com.ibm.icu.text.StringSearch}.</p>
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>To construct a CollationElementIterator object, users
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * call the method getCollationElementIterator() on a
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * RuleBasedCollator that defines the desired sorting order.</p>
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> Example:
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *  String testString = "This is a test";
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *  RuleBasedCollator rbc = new RuleBasedCollator("&amp;a&lt;b");
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *  CollationElementIterator iterator = rbc.getCollationElementIterator(testString);
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *  int primaryOrder = iterator.IGNORABLE;
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *  while (primaryOrder != iterator.NULLORDER) {
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      int order = iterator.next();
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      if (order != iterator.IGNORABLE &&
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          order != iterator.NULLORDER) {
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          // order is valid, not ignorable and we have not passed the end
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          // of the iteration, we do something
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          primaryOrder = CollationElementIterator.primaryOrder(order);
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          System.out.println("Next primary order 0x" +
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *                             Integer.toHexString(primaryOrder));
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      }
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *  }
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p>
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The method next() returns the collation order of the next character based on
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the comparison level of the collator. The method previous() returns the
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * collation order of the previous character based on the comparison level of
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the collator. The Collation Element Iterator moves only in one direction
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * between calls to reset(), setOffset(), or setText(). That is, next() and
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * previous() can not be inter-used. Whenever previous() is to be called after
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * next() or vice versa, reset(), setOffset() or setText() has to be called first
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * to reset the status, shifting current position to either the end or the start of
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the string (reset() or setText()), or the specified position (setOffset()).
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Hence at the next call of next() or previous(), the first or last collation order,
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or collation order at the specified position will be returned. If a change of
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * direction is done without one of these calls, the result is undefined.
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p>
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This class is not subclassable.
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p>
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see Collator
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see RuleBasedCollator
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see StringSearch
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Syn Wee Quek
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.8
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class CollationElementIterator
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert{
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationIterator iter_;  // owned
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private RuleBasedCollator rbc_;  // aliased
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int otherHalf_;
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <0: backwards; 0: just after reset() (previous() begins from end);
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 1: just after setOffset(); >1: forward
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private byte dir_;
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Stores offsets from expansions and from unsafe-backwards iteration,
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * so that getOffset() returns intermediate offsets for the CEs
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that are consistent with forward iteration.
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UVector32 offsets_;
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String string_;  // TODO: needed in Java? if so, then add a UCharacterIterator field too?
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>This constant is returned by the iterator in the methods
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * next() and previous() when the end or the beginning of the
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * source string has been reached, and there are no more valid
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * collation elements to return.</p>
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>See class documentation for an example of use.</p>
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #next
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #previous */
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final static int NULLORDER = 0xffffffff;
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>This constant is returned by the iterator in the methods
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * next() and previous() when a collation element result is to be
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ignored.</p>
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>See class documentation for an example of use.</p>
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #next
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #previous */
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IGNORABLE = 0;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the primary order of the specified collation element,
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * i.e. the first 16 bits.  This value is unsigned.
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param ce the collation element
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the element's 16 bits primary order.
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final static int primaryOrder(int ce) {
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (ce >>> 16) & 0xffff;
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the secondary order of the specified collation element,
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * i.e. the 16th to 23th bits, inclusive.  This value is unsigned.
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param ce the collation element
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the element's 8 bits secondary order
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final static int secondaryOrder(int ce) {
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (ce >>> 8) & 0xff;
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the tertiary order of the specified collation element, i.e. the last
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 8 bits.  This value is unsigned.
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param ce the collation element
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the element's 8 bits tertiary order
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final static int tertiaryOrder(int ce) {
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce & 0xff;
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int getFirstHalf(long p, int lower32) {
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((int)p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff);
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int getSecondHalf(long p, int lower32) {
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((int)p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean ceNeedsTwoParts(long ce) {
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (ce & 0xffff00ff003fL) != 0;
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationElementIterator(RuleBasedCollator collator) {
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter_ = null;
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rbc_ = collator;
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        otherHalf_ = 0;
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dir_ = 0;
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offsets_ = null;
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>CollationElementIterator constructor. This takes a source
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * string and a RuleBasedCollator. The iterator will walk through
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the source string based on the rules defined by the
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * collator. If the source string is empty, NULLORDER will be
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned on the first call to next().</p>
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source the source string.
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator the RuleBasedCollator
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    CollationElementIterator(String source, RuleBasedCollator collator) {
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(collator);
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setText(source);
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Note: The constructors should take settings & tailoring, not a collator,
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // to avoid circular dependencies.
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // However, for equals() we would need to be able to compare tailoring data for equality
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // without making CollationData or CollationTailoring depend on TailoredSet.
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // (See the implementation of RuleBasedCollator.equals().)
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // That might require creating an intermediate class that would be used
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // by both CollationElementIterator and RuleBasedCollator
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // but only contain the part of RBC.equals() related to data and rules.
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>CollationElementIterator constructor. This takes a source
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * character iterator and a RuleBasedCollator. The iterator will
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * walk through the source string based on the rules defined by
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the collator. If the source string is empty, NULLORDER will be
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned on the first call to next().</p>
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source the source string iterator.
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator the RuleBasedCollator
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    CollationElementIterator(CharacterIterator source, RuleBasedCollator collator) {
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(collator);
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setText(source);
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>CollationElementIterator constructor. This takes a source
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * character iterator and a RuleBasedCollator. The iterator will
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * walk through the source string based on the rules defined by
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the collator. If the source string is empty, NULLORDER will be
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned on the first call to next().</p>
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source the source string iterator.
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param collator the RuleBasedCollator
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator) {
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this(collator);
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setText(source);
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Returns the character offset in the source string
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * corresponding to the next collation element. I.e., getOffset()
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returns the position in the source string corresponding to the
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * collation element that will be returned by the next call to
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * next() or previous(). This value could be any of:
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <ul>
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <li> The index of the <b>first</b> character corresponding to
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the next collation element. (This means that if
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <code>setOffset(offset)</code> sets the index in the middle of
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * a contraction, <code>getOffset()</code> returns the index of
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the first character in the contraction, which may not be equal
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to the original offset that was set. Hence calling getOffset()
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * immediately after setOffset(offset) does not guarantee that the
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * original offset set will be returned.)
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <li> If normalization is on, the index of the <b>immediate</b>
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * subsequent character, or composite character with the first
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * character, having a combining class of 0.
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <li> The length of the source string, if iteration has reached
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the end.
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *</ul>
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </p>
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The character offset in the source string corresponding to the
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         collation element that will be returned by the next call to
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         next() or previous().
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getOffset() {
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (dir_ < 0 && offsets_ != null && !offsets_.isEmpty()) {
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // CollationIterator.previousCE() decrements the CEs length
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // while it pops CEs from its internal buffer.
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = iter_.getCEsLength();
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (otherHalf_ != 0) {
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Return the trailing CE offset while we are in the middle of a 64-bit CE.
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++i;
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (i < offsets_.size());
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return offsets_.elementAti(i);
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return iter_.getOffset();
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Get the next collation element in the source string.</p>
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>This iterator iterates over a sequence of collation elements
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that were built from the string. Because there isn't
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * necessarily a one-to-one mapping from characters to collation
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * elements, this doesn't mean the same thing as "return the
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * collation element [or ordering priority] of the next character
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in the string".</p>
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>This function returns the collation element that the
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iterator is currently pointing to, and then updates the
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * internal pointer to point to the next element.</p>
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the next collation element or NULLORDER if the end of the
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         iteration has been reached.
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int next() {
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (dir_ > 1) {
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Continue forward iteration. Test this first.
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (otherHalf_ != 0) {
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int oh = otherHalf_;
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                otherHalf_ = 0;
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return oh;
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (dir_ == 1) {
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // next() after setOffset()
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dir_ = 2;
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (dir_ == 0) {
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // The iter_ is already reset to the start of the text.
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dir_ = 2;
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else /* dir_ < 0 */{
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // illegal change of direction
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalStateException("Illegal change of direction");
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status.
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // No need to keep all CEs in the buffer when we iterate.
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter_.clearCEsIfNoneRemaining();
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long ce = iter_.nextCE();
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ce == Collation.NO_CE) {
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return NULLORDER;
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long p = ce >>> 32;
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lower32 = (int) ce;
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int firstHalf = getFirstHalf(p, lower32);
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int secondHalf = getSecondHalf(p, lower32);
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (secondHalf != 0) {
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            otherHalf_ = secondHalf | 0xc0; // continuation CE
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return firstHalf;
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Get the previous collation element in the source string.</p>
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>This iterator iterates over a sequence of collation elements
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that were built from the string. Because there isn't
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * necessarily a one-to-one mapping from characters to collation
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * elements, this doesn't mean the same thing as "return the
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * collation element [or ordering priority] of the previous
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * character in the string".</p>
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>This function updates the iterator's internal pointer to
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * point to the collation element preceding the one it's currently
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * pointing to and then returns that element, while next() returns
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the current element and then updates the pointer.</p>
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the previous collation element, or NULLORDER when the start of
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             the iteration has been reached.
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int previous() {
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (dir_ < 0) {
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Continue backwards iteration. Test this first.
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (otherHalf_ != 0) {
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int oh = otherHalf_;
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                otherHalf_ = 0;
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return oh;
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (dir_ == 0) {
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            iter_.resetToOffset(string_.length());
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dir_ = -1;
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (dir_ == 1) {
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // previous() after setOffset()
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dir_ = -1;
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else /* dir_ > 1 */{
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // illegal change of direction
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalStateException("Illegal change of direction");
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status.
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offsets_ == null) {
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offsets_ = new UVector32();
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If we already have expansion CEs, then we also have offsets.
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Otherwise remember the trailing offset in case we need to
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // write offsets for an artificial expansion.
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limitOffset = iter_.getCEsLength() == 0 ? iter_.getOffset() : 0;
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long ce = iter_.previousCE(offsets_);
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ce == Collation.NO_CE) {
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return NULLORDER;
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long p = ce >>> 32;
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lower32 = (int) ce;
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int firstHalf = getFirstHalf(p, lower32);
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int secondHalf = getSecondHalf(p, lower32);
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (secondHalf != 0) {
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (offsets_.isEmpty()) {
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // When we convert a single 64-bit CE into two 32-bit CEs,
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // we need to make this artificial expansion behave like a normal expansion.
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // See CollationIterator.previousCE().
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offsets_.addElement(iter_.getOffset());
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offsets_.addElement(limitOffset);
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            otherHalf_ = firstHalf;
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return secondHalf | 0xc0; // continuation CE
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return firstHalf;
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p> Resets the cursor to the beginning of the string. The next
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * call to next() or previous() will return the first and last
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * collation element in the string, respectively.</p>
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>If the RuleBasedCollator used by this iterator has had its
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * attributes changed, calling reset() will reinitialize the
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iterator to use the new attributes.</p>
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void reset() {
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter_ .resetToOffset(0);
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        otherHalf_ = 0;
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dir_ = 0;
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p> Sets the iterator to point to the collation element
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * corresponding to the character at the specified offset. The
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * value returned by the next call to next() will be the collation
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * element corresponding to the characters at offset.</p>
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>If offset is in the middle of a contracting character
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * sequence, the iterator is adjusted to the start of the
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * contracting sequence. This means that getOffset() is not
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * guaranteed to return the same value set by this method.</p>
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>If the decomposition mode is on, and offset is in the middle
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of a decomposible range of source text, the iterator may not
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * return a correct result for the next forwards or backwards
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iteration.  The user must ensure that the offset is not in the
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * middle of a decomposible range.</p>
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newOffset the character offset into the original source string to
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *        set. Note that this is not an offset into the corresponding
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *        sequence of collation elements.
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setOffset(int newOffset) {
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (0 < newOffset && newOffset < string_.length()) {
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int offset = newOffset;
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            do {
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char c = string_.charAt(offset);
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (!rbc_.isUnsafe(c) ||
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        (Character.isHighSurrogate(c) && !rbc_.isUnsafe(string_.codePointAt(offset)))) {
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Back up to before this unsafe character.
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                --offset;
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } while (offset > 0);
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (offset < newOffset) {
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // We might have backed up more than necessary.
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe,
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // but for text "chu" setOffset(2) should remain at 2
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // although we initially back up to offset 0.
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Find the last safe offset no greater than newOffset by iterating forward.
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int lastSafeOffset = offset;
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    iter_.resetToOffset(lastSafeOffset);
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    do {
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        iter_.nextCE();
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } while ((offset = iter_.getOffset()) == lastSafeOffset);
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (offset <= newOffset) {
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        lastSafeOffset = offset;
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } while (offset < newOffset);
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newOffset = lastSafeOffset;
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter_.resetToOffset(newOffset);
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        otherHalf_ = 0;
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dir_ = 1;
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Set a new source string for iteration, and reset the offset
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to the beginning of the text.</p>
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source the new source string for iteration.
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(String source) {
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        string_ = source; // TODO: do we need to remember the source string in a field?
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationIterator newIter;
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean numeric = rbc_.settings.readOnly().isNumeric();
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (rbc_.settings.readOnly().dontCheckFCD()) {
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newIter = new UTF16CollationIterator(rbc_.data, numeric, string_, 0);
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newIter = new FCDUTF16CollationIterator(rbc_.data, numeric, string_, 0);
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter_ = newIter;
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        otherHalf_ = 0;
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dir_ = 0;
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Set a new source string iterator for iteration, and reset the
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * offset to the beginning of the text.
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </p>
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>The source iterator's integrity will be preserved since a new copy
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * will be created for use.</p>
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source the new source string iterator for iteration.
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(UCharacterIterator source) {
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        string_ = source.getText(); // TODO: do we need to remember the source string in a field?
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: In C++, we just setText(source.getText()).
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In Java, we actually operate on a character iterator.
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // (The old code apparently did so only for a CharacterIterator;
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // for a UCharacterIterator it also just used source.getText()).
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: do we need to remember the cloned iterator in a field?
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator src;
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            src = (UCharacterIterator) source.clone();
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (CloneNotSupportedException e) {
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fall back to ICU 52 behavior of iterating over the text contents
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // of the UCharacterIterator.
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setText(source.getText());
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        src.setToStart();
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationIterator newIter;
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean numeric = rbc_.settings.readOnly().isNumeric();
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (rbc_.settings.readOnly().dontCheckFCD()) {
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newIter = new IterCollationIterator(rbc_.data, numeric, src);
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter_ = newIter;
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        otherHalf_ = 0;
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dir_ = 0;
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Set a new source string iterator for iteration, and reset the
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * offset to the beginning of the text.
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </p>
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source the new source string iterator for iteration.
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(CharacterIterator source) {
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: In C++, we just setText(source.getText()).
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In Java, we actually operate on a character iterator.
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: do we need to remember the iterator in a field?
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: apparently we don't clone a CharacterIterator in Java,
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we only clone the text for a UCharacterIterator?? see the old code in the constructors
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator src = new CharacterIteratorWrapper(source);
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        src.setToStart();
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        string_ = src.getText(); // TODO: do we need to remember the source string in a field?
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationIterator newIter;
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean numeric = rbc_.settings.readOnly().isNumeric();
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (rbc_.settings.readOnly().dontCheckFCD()) {
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newIter = new IterCollationIterator(rbc_.data, numeric, src);
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter_ = newIter;
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        otherHalf_ = 0;
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dir_ = 0;
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note: This method is @stable ICU 2.0 in ICU4C, but not available
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // in ICU4J. For now, keep it package local.
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Gets the comparison order in the desired strength. Ignore the other
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * differences.
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param order The order value
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int strengthOrder(int order) {
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int s = rbc_.settings.readOnly().getStrength();
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Mask off the unwanted differences.
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (s == Collator.PRIMARY) {
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            order &= 0xffff0000;
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        else if (s == Collator.SECONDARY) {
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            order &= 0xffffff00;
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return order;
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class MaxExpSink implements ContractionsAndExpansions.CESink {
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        MaxExpSink(Map<Integer, Integer> h) {
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            maxExpansions = h;
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Java 6: @Override
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void handleCE(long ce) {
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Java 6: @Override
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void handleExpansion(long ces[], int start, int length) {
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (length <= 1) {
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // We do not need to add single CEs into the map.
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int count = 0; // number of CE "halves"
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < length; ++i) {
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                count += ceNeedsTwoParts(ces[start + i]) ? 2 : 1;
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // last "half" of the last CE
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long ce = ces[start + length - 1];
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long p = ce >>> 32;
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int lower32 = (int) ce;
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int lastHalf = getSecondHalf(p, lower32);
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (lastHalf == 0) {
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastHalf = getFirstHalf(p, lower32);
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (lastHalf != 0);
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastHalf |= 0xc0; // old-style continuation CE
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Integer oldCount = maxExpansions.get(lastHalf);
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (oldCount == null || count > oldCount) {
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                maxExpansions.put(lastHalf, count);
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private Map<Integer, Integer> maxExpansions;
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final Map<Integer, Integer> computeMaxExpansions(CollationData data) {
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Map<Integer, Integer> maxExpansions = new HashMap<Integer, Integer>();
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        MaxExpSink sink = new MaxExpSink(maxExpansions);
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        new ContractionsAndExpansions(null, null, sink, true).forData(data);
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return maxExpansions;
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p> Returns the maximum length of any expansion sequence that ends with
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the specified collation element. If there is no expansion with this
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * collation element as the last element, returns 1.
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </p>
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param ce a collation element returned by previous() or next().
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the maximum length of any expansion sequence ending
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         with the specified collation element.
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getMaxExpansion(int ce) {
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getMaxExpansion(rbc_.tailoring.maxExpansions, ce);
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int getMaxExpansion(Map<Integer, Integer> maxExpansions, int order) {
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (order == 0) {
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 1;
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Integer max;
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (maxExpansions != null && (max = maxExpansions.get(order)) != null) {
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return max;
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((order & 0xc0) == 0xc0) {
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // old-style continuation CE
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 2;
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 1;
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private byte normalizeDir() {
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dir_ == 1 ? 0 : dir_;
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tests that argument object is equals to this CollationElementIterator.
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Iterators are equal if the objects uses the same RuleBasedCollator,
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the same source text and have the same current position in iteration.
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param that object to test if it is equals to this
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             CollationElementIterator
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean equals(Object that) {
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (that == this) {
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (that instanceof CollationElementIterator) {
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationElementIterator thatceiter = (CollationElementIterator) that;
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return rbc_.equals(thatceiter.rbc_)
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    && otherHalf_ == thatceiter.otherHalf_
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    && normalizeDir() == thatceiter.normalizeDir()
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    && string_.equals(thatceiter.string_)
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    && iter_.equals(thatceiter.iter_);
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false;
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Mock implementation of hashCode(). This implementation always returns a constant
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * value. When Java assertion is enabled, this method triggers an assertion failure.
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int hashCode() {
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert false : "hashCode not designed";
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 42;
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public RuleBasedCollator getRuleBasedCollator() {
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return rbc_;
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
735