1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 2009-2014, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************************* 7 */ 8 9package android.icu.impl; 10 11import java.io.DataOutputStream; 12import java.io.IOException; 13import java.io.OutputStream; 14import java.nio.ByteBuffer; 15 16/** 17 * @author aheninger 18 * 19 * A read-only Trie2, holding 32 bit data values. 20 * 21 * A Trie2 is a highly optimized data structure for mapping from Unicode 22 * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. 23 * 24 * See class Trie2 for descriptions of the API for accessing the contents of a trie. 25 * 26 * The fundamental data access methods are declared final in this class, with 27 * the intent that applications might gain a little extra performance, when compared 28 * with calling the same methods via the abstract UTrie2 base class. 29 * @hide Only a subset of ICU is exposed in Android 30 */ 31 32public class Trie2_32 extends Trie2 { 33 34 /** 35 * Internal constructor, not for general use. 36 */ 37 Trie2_32() { 38 } 39 40 41 /** 42 * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). 43 * The serialized format is identical between ICU4C and ICU4J, so this function 44 * will work with serialized Trie2s from either. 45 * 46 * The serialized Trie2 in the bytes may be in either little or big endian byte order. 47 * This allows using serialized Tries from ICU4C without needing to consider the 48 * byte order of the system that created them. 49 * 50 * @param bytes a byte buffer to the serialized form of a UTrie2. 51 * @return An unserialized Trie_32, ready for use. 52 * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. 53 * @throws IOException if a read error occurs in the buffer. 54 * @throws ClassCastException if the bytes contains a serialized Trie2_16 55 */ 56 public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException { 57 return (Trie2_32) Trie2.createFromSerialized(bytes); 58 } 59 60 /** 61 * Get the value for a code point as stored in the Trie2. 62 * 63 * @param codePoint the code point 64 * @return the value 65 */ 66 @Override 67 public final int get(int codePoint) { 68 int value; 69 int ix; 70 71 if (codePoint >= 0) { 72 if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { 73 // Ordinary BMP code point, excluding leading surrogates. 74 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 75 // 32 bit data is stored in the index array itself. 76 ix = index[codePoint >> UTRIE2_SHIFT_2]; 77 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 78 value = data32[ix]; 79 return value; 80 } 81 if (codePoint <= 0xffff) { 82 // Lead Surrogate Code Point. A Separate index section is stored for 83 // lead surrogate code units and code points. 84 // The main index has the code unit data. 85 // For this function, we need the code point data. 86 // Note: this expression could be refactored for slightly improved efficiency, but 87 // surrogate code points will be so rare in practice that it's not worth it. 88 ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; 89 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 90 value = data32[ix]; 91 return value; 92 } 93 if (codePoint < highStart) { 94 // Supplemental code point, use two-level lookup. 95 ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); 96 ix = index[ix]; 97 ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; 98 ix = index[ix]; 99 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 100 value = data32[ix]; 101 return value; 102 } 103 if (codePoint <= 0x10ffff) { 104 value = data32[highValueIndex]; 105 return value; 106 } 107 } 108 109 // Fall through. The code point is outside of the legal range of 0..0x10ffff. 110 return errorValue; 111 } 112 113 114 /** 115 * Get a Trie2 value for a UTF-16 code unit. 116 * 117 * This function returns the same value as get() if the input 118 * character is outside of the lead surrogate range 119 * 120 * There are two values stored in a Trie2 for inputs in the lead 121 * surrogate range. This function returns the alternate value, 122 * while Trie2.get() returns the main value. 123 * 124 * @param codeUnit a 16 bit code unit or lead surrogate value. 125 * @return the value 126 */ 127 @Override 128 public int getFromU16SingleLead(char codeUnit){ 129 int value; 130 int ix; 131 132 ix = index[codeUnit >> UTRIE2_SHIFT_2]; 133 ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); 134 value = data32[ix]; 135 return value; 136 137 } 138 139 /** 140 * Serialize a Trie2_32 onto an OutputStream. 141 * 142 * A Trie2 can be serialized multiple times. 143 * The serialized data is compatible with ICU4C UTrie2 serialization. 144 * Trie2 serialization is unrelated to Java object serialization. 145 * 146 * @param os the stream to which the serialized Trie2 data will be written. 147 * @return the number of bytes written. 148 * @throw IOException on an error writing to the OutputStream. 149 */ 150 public int serialize(OutputStream os) throws IOException { 151 DataOutputStream dos = new DataOutputStream(os); 152 int bytesWritten = 0; 153 154 bytesWritten += serializeHeader(dos); 155 for (int i=0; i<dataLength; i++) { 156 dos.writeInt(data32[i]); 157 } 158 bytesWritten += dataLength*4; 159 return bytesWritten; 160 } 161 162 /** 163 * @return the number of bytes of the serialized trie 164 */ 165 public int getSerializedLength() { 166 return 16+header.indexLength*2+dataLength*4; 167 } 168 169 /** 170 * Given a starting code point, find the last in a range of code points, 171 * all with the same value. 172 * 173 * This function is part of the implementation of iterating over the 174 * Trie2's contents. 175 * @param startingCP The code point at which to begin looking. 176 * @return The last code point with the same value as the starting code point. 177 */ 178 @Override 179 int rangeEnd(int startingCP, int limit, int value) { 180 int cp = startingCP; 181 int block = 0; 182 int index2Block = 0; 183 184 // Loop runs once for each of 185 // - a partial data block 186 // - a reference to the null (default) data block. 187 // - a reference to the index2 null block 188 189 outerLoop: 190 for (;;) { 191 if (cp >= limit) { 192 break; 193 } 194 if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { 195 // Ordinary BMP code point, excluding leading surrogates. 196 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 197 // 16 bit data is stored in the index array itself. 198 index2Block = 0; 199 block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; 200 } else if (cp < 0xffff) { 201 // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 202 index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; 203 block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; 204 } else if (cp < highStart) { 205 // Supplemental code point, use two-level lookup. 206 int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); 207 index2Block = index[ix]; 208 block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; 209 } else { 210 // Code point above highStart. 211 if (value == data32[highValueIndex]) { 212 cp = limit; 213 } 214 break; 215 } 216 217 if (index2Block == index2NullOffset) { 218 if (value != initialValue) { 219 break; 220 } 221 cp += UTRIE2_CP_PER_INDEX_1_ENTRY; 222 } else if (block == dataNullOffset) { 223 // The block at dataNullOffset has all values == initialValue. 224 // Because Trie2 iteration always proceeds in ascending order, we will always 225 // encounter a null block at its beginning, and can skip over 226 // a number of code points equal to the length of the block. 227 if (value != initialValue) { 228 break; 229 } 230 cp += UTRIE2_DATA_BLOCK_LENGTH; 231 } else { 232 // Current position refers to an ordinary data block. 233 // Walk over the data entries, checking the values. 234 int startIx = block + (cp & UTRIE2_DATA_MASK); 235 int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; 236 for (int ix = startIx; ix<limitIx; ix++) { 237 if (data32[ix] != value) { 238 // We came to an entry with a different value. 239 // We are done. 240 cp += (ix - startIx); 241 break outerLoop; 242 } 243 } 244 // The ordinary data block contained our value until its end. 245 // Advance the current code point, and continue the outer loop. 246 cp += limitIx - startIx; 247 } 248 } 249 if (cp > limit) { 250 cp = limit; 251 } 252 253 return cp - 1; 254 } 255 256} 257 258