12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2009-2014, International Business Machines Corporation and
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.DataOutputStream;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.OutputStream;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.ByteBuffer;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author aheninger
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A read-only Trie2, holding 32 bit data values.
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A Trie2 is a highly optimized data structure for mapping from Unicode
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * See class Trie2 for descriptions of the API for accessing the contents of a trie.
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The fundamental data access methods are declared final in this class, with
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the intent that applications might gain a little extra performance, when compared
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * with calling the same methods via the abstract UTrie2 base class.
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class Trie2_32 extends Trie2 {
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Internal constructor, not for general use.
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    Trie2_32() {
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Create a Trie2 from its serialized form.  Inverse of utrie2_serialize().
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The serialized format is identical between ICU4C and ICU4J, so this function
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * will work with serialized Trie2s from either.
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The serialized Trie2 in the bytes may be in either little or big endian byte order.
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This allows using serialized Tries from ICU4C without needing to consider the
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * byte order of the system that created them.
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param bytes a byte buffer to the serialized form of a UTrie2.
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return An unserialized Trie_32, ready for use.
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException if the stream does not contain a serialized Trie2.
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IOException if a read error occurs in the buffer.
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws ClassCastException if the bytes contains a serialized Trie2_16
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException {
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (Trie2_32) Trie2.createFromSerialized(bytes);
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Get the value for a code point as stored in the Trie2.
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param codePoint the code point
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the value
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int get(int codePoint) {
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int value;
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ix;
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (codePoint >= 0) {
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Ordinary BMP code point, excluding leading surrogates.
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // 32 bit data is stored in the index array itself.
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix = index[codePoint >> UTRIE2_SHIFT_2];
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                value = data32[ix];
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return value;
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (codePoint <= 0xffff) {
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Lead Surrogate Code Point.  A Separate index section is stored for
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // lead surrogate code units and code points.
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //   The main index has the code unit data.
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //   For this function, we need the code point data.
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Note: this expression could be refactored for slightly improved efficiency, but
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //       surrogate code points will be so rare in practice that it's not worth it.
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                value = data32[ix];
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return value;
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (codePoint < highStart) {
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Supplemental code point, use two-level lookup.
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1);
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix = index[ix];
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix = index[ix];
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                value = data32[ix];
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return value;
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (codePoint <= 0x10ffff) {
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                value = data32[highValueIndex];
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return value;
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return errorValue;
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Get a Trie2 value for a UTF-16 code unit.
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function returns the same value as get() if the input
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * character is outside of the lead surrogate range
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * There are two values stored in a Trie2 for inputs in the lead
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * surrogate range.  This function returns the alternate value,
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * while Trie2.get() returns the main value.
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param codeUnit a 16 bit code unit or lead surrogate value.
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the value
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getFromU16SingleLead(char codeUnit){
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int value;
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ix;
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ix = index[codeUnit >> UTRIE2_SHIFT_2];
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK);
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        value = data32[ix];
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return value;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Serialize a Trie2_32 onto an OutputStream.
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A Trie2 can be serialized multiple times.
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The serialized data is compatible with ICU4C UTrie2 serialization.
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Trie2 serialization is unrelated to Java object serialization.
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param os the stream to which the serialized Trie2 data will be written.
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the number of bytes written.
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throw IOException on an error writing to the OutputStream.
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int serialize(OutputStream os) throws IOException {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        DataOutputStream dos = new DataOutputStream(os);
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int  bytesWritten = 0;
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        bytesWritten += serializeHeader(dos);
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i=0; i<dataLength; i++) {
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dos.writeInt(data32[i]);
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        bytesWritten += dataLength*4;
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return bytesWritten;
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the number of bytes of the serialized trie
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getSerializedLength() {
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 16+header.indexLength*2+dataLength*4;
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Given a starting code point, find the last in a range of code points,
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * all with the same value.
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function is part of the implementation of iterating over the
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Trie2's contents.
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param startingCP The code point at which to begin looking.
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The last code point with the same value as the starting code point.
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int rangeEnd(int startingCP, int limit, int value) {
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int   cp = startingCP;
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int   block = 0;
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int   index2Block = 0;
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Loop runs once for each of
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   - a partial data block
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   - a reference to the null (default) data block.
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   - a reference to the index2 null block
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert      outerLoop:
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (;;) {
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cp >= limit) {
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) {
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Ordinary BMP code point, excluding leading surrogates.
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // 16 bit data is stored in the index array itself.
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index2Block = 0;
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                block       = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT;
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (cp < 0xffff) {
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index2Block = UTRIE2_LSCP_INDEX_2_OFFSET;
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                block       = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT;
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (cp < highStart) {
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Supplemental code point, use two-level lookup.
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1);
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index2Block = index[ix];
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT;
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else  {
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Code point above highStart.
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (value == data32[highValueIndex]) {
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    cp = limit;
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index2Block == index2NullOffset) {
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (value != initialValue) {
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cp += UTRIE2_CP_PER_INDEX_1_ENTRY;
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (block == dataNullOffset) {
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The block at dataNullOffset has all values == initialValue.
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Because Trie2 iteration always proceeds in ascending order, we will always
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //   encounter a null block at its beginning, and can skip over
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //   a number of code points equal to the length of the block.
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (value != initialValue) {
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cp += UTRIE2_DATA_BLOCK_LENGTH;
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Current position refers to an ordinary data block.
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Walk over the data entries, checking the values.
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int startIx = block + (cp & UTRIE2_DATA_MASK);
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH;
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int ix = startIx; ix<limitIx; ix++) {
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (data32[ix] != value) {
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // We came to an entry with a different value.
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        //   We are done.
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        cp += (ix - startIx);
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break outerLoop;
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The ordinary data block contained our value until its end.
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  Advance the current code point, and continue the outer loop.
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cp += limitIx - startIx;
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (cp > limit) {
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cp = limit;
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return cp - 1;
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
258