12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2009-2014, International Business Machines Corporation and 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.DataOutputStream; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.OutputStream; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.ByteBuffer; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author aheninger 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A read-only Trie2, holding 32 bit data values. 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A Trie2 is a highly optimized data structure for mapping from Unicode 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * See class Trie2 for descriptions of the API for accessing the contents of a trie. 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The fundamental data access methods are declared final in this class, with 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the intent that applications might gain a little extra performance, when compared 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * with calling the same methods via the abstract UTrie2 base class. 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class Trie2_32 extends Trie2 { 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Internal constructor, not for general use. 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Trie2_32() { 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The serialized format is identical between ICU4C and ICU4J, so this function 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * will work with serialized Trie2s from either. 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The serialized Trie2 in the bytes may be in either little or big endian byte order. 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This allows using serialized Tries from ICU4C without needing to consider the 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * byte order of the system that created them. 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param bytes a byte buffer to the serialized form of a UTrie2. 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return An unserialized Trie_32, ready for use. 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws IOException if a read error occurs in the buffer. 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws ClassCastException if the bytes contains a serialized Trie2_16 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException { 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (Trie2_32) Trie2.createFromSerialized(bytes); 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Get the value for a code point as stored in the Trie2. 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param codePoint the code point 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the value 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final int get(int codePoint) { 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value; 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int ix; 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (codePoint >= 0) { 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Ordinary BMP code point, excluding leading surrogates. 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 32 bit data is stored in the index array itself. 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = index[codePoint >> UTRIE2_SHIFT_2]; 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = data32[ix]; 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return value; 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (codePoint <= 0xffff) { 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Lead Surrogate Code Point. A Separate index section is stored for 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // lead surrogate code units and code points. 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The main index has the code unit data. 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // For this function, we need the code point data. 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Note: this expression could be refactored for slightly improved efficiency, but 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // surrogate code points will be so rare in practice that it's not worth it. 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = data32[ix]; 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return value; 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (codePoint < highStart) { 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Supplemental code point, use two-level lookup. 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = index[ix]; 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = index[ix]; 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = data32[ix]; 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return value; 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (codePoint <= 0x10ffff) { 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = data32[highValueIndex]; 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return value; 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Fall through. The code point is outside of the legal range of 0..0x10ffff. 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return errorValue; 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Get a Trie2 value for a UTF-16 code unit. 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This function returns the same value as get() if the input 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * character is outside of the lead surrogate range 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * There are two values stored in a Trie2 for inputs in the lead 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * surrogate range. This function returns the alternate value, 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * while Trie2.get() returns the main value. 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param codeUnit a 16 bit code unit or lead surrogate value. 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the value 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getFromU16SingleLead(char codeUnit){ 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value; 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int ix; 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = index[codeUnit >> UTRIE2_SHIFT_2]; 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = data32[ix]; 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return value; 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Serialize a Trie2_32 onto an OutputStream. 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A Trie2 can be serialized multiple times. 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The serialized data is compatible with ICU4C UTrie2 serialization. 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Trie2 serialization is unrelated to Java object serialization. 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param os the stream to which the serialized Trie2 data will be written. 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the number of bytes written. 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throw IOException on an error writing to the OutputStream. 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int serialize(OutputStream os) throws IOException { 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert DataOutputStream dos = new DataOutputStream(os); 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int bytesWritten = 0; 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytesWritten += serializeHeader(dos); 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i=0; i<dataLength; i++) { 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dos.writeInt(data32[i]); 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytesWritten += dataLength*4; 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return bytesWritten; 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the number of bytes of the serialized trie 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getSerializedLength() { 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 16+header.indexLength*2+dataLength*4; 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Given a starting code point, find the last in a range of code points, 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * all with the same value. 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This function is part of the implementation of iterating over the 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Trie2's contents. 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param startingCP The code point at which to begin looking. 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The last code point with the same value as the starting code point. 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rangeEnd(int startingCP, int limit, int value) { 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int cp = startingCP; 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int block = 0; 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int index2Block = 0; 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Loop runs once for each of 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // - a partial data block 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // - a reference to the null (default) data block. 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // - a reference to the index2 null block 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert outerLoop: 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (cp >= limit) { 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Ordinary BMP code point, excluding leading surrogates. 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 16 bit data is stored in the index array itself. 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index2Block = 0; 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (cp < 0xffff) { 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (cp < highStart) { 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Supplemental code point, use two-level lookup. 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index2Block = index[ix]; 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Code point above highStart. 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (value == data32[highValueIndex]) { 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cp = limit; 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (index2Block == index2NullOffset) { 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (value != initialValue) { 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cp += UTRIE2_CP_PER_INDEX_1_ENTRY; 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (block == dataNullOffset) { 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The block at dataNullOffset has all values == initialValue. 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Because Trie2 iteration always proceeds in ascending order, we will always 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // encounter a null block at its beginning, and can skip over 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // a number of code points equal to the length of the block. 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (value != initialValue) { 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cp += UTRIE2_DATA_BLOCK_LENGTH; 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Current position refers to an ordinary data block. 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Walk over the data entries, checking the values. 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int startIx = block + (cp & UTRIE2_DATA_MASK); 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int ix = startIx; ix<limitIx; ix++) { 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (data32[ix] != value) { 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We came to an entry with a different value. 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We are done. 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cp += (ix - startIx); 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break outerLoop; 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The ordinary data block contained our value until its end. 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Advance the current code point, and continue the outer loop. 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cp += limitIx - startIx; 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (cp > limit) { 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cp = limit; 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return cp - 1; 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 258