Trie2_32.java revision 2ae130017183d2f66d55bf0ca51f8da3294644fd
1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 2009-2014, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************************* 7 */ 8 9package android.icu.impl; 10 11import java.io.DataOutputStream; 12import java.io.IOException; 13import java.io.OutputStream; 14import java.nio.ByteBuffer; 15 16/** 17 * @author aheninger 18 * 19 * A read-only Trie2, holding 32 bit data values. 20 * 21 * A Trie2 is a highly optimized data structure for mapping from Unicode 22 * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. 23 * 24 * See class Trie2 for descriptions of the API for accessing the contents of a trie. 25 * 26 * The fundamental data access methods are declared final in this class, with 27 * the intent that applications might gain a little extra performance, when compared 28 * with calling the same methods via the abstract UTrie2 base class. 29 */ 30 31public class Trie2_32 extends Trie2 { 32 33 /** 34 * Internal constructor, not for general use. 35 */ 36 Trie2_32() { 37 } 38 39 40 /** 41 * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). 42 * The serialized format is identical between ICU4C and ICU4J, so this function 43 * will work with serialized Trie2s from either. 44 * 45 * The serialized Trie2 in the bytes may be in either little or big endian byte order. 46 * This allows using serialized Tries from ICU4C without needing to consider the 47 * byte order of the system that created them. 48 * 49 * @param bytes a byte buffer to the serialized form of a UTrie2. 50 * @return An unserialized Trie_32, ready for use. 51 * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. 52 * @throws IOException if a read error occurs in the buffer. 53 * @throws ClassCastException if the bytes contains a serialized Trie2_16 54 */ 55 public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException { 56 return (Trie2_32) Trie2.createFromSerialized(bytes); 57 } 58 59 /** 60 * Get the value for a code point as stored in the Trie2. 61 * 62 * @param codePoint the code point 63 * @return the value 64 */ 65 @Override 66 public final int get(int codePoint) { 67 int value; 68 int ix; 69 70 if (codePoint >= 0) { 71 if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { 72 // Ordinary BMP code point, excluding leading surrogates. 73 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 74 // 32 bit data is stored in the index array itself. 75 ix = index[codePoint >> UTRIE2_SHIFT_2]; 76 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 77 value = data32[ix]; 78 return value; 79 } 80 if (codePoint <= 0xffff) { 81 // Lead Surrogate Code Point. A Separate index section is stored for 82 // lead surrogate code units and code points. 83 // The main index has the code unit data. 84 // For this function, we need the code point data. 85 // Note: this expression could be refactored for slightly improved efficiency, but 86 // surrogate code points will be so rare in practice that it's not worth it. 87 ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; 88 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 89 value = data32[ix]; 90 return value; 91 } 92 if (codePoint < highStart) { 93 // Supplemental code point, use two-level lookup. 94 ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); 95 ix = index[ix]; 96 ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; 97 ix = index[ix]; 98 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 99 value = data32[ix]; 100 return value; 101 } 102 if (codePoint <= 0x10ffff) { 103 value = data32[highValueIndex]; 104 return value; 105 } 106 } 107 108 // Fall through. The code point is outside of the legal range of 0..0x10ffff. 109 return errorValue; 110 } 111 112 113 /** 114 * Get a Trie2 value for a UTF-16 code unit. 115 * 116 * This function returns the same value as get() if the input 117 * character is outside of the lead surrogate range 118 * 119 * There are two values stored in a Trie2 for inputs in the lead 120 * surrogate range. This function returns the alternate value, 121 * while Trie2.get() returns the main value. 122 * 123 * @param codeUnit a 16 bit code unit or lead surrogate value. 124 * @return the value 125 */ 126 @Override 127 public int getFromU16SingleLead(char codeUnit){ 128 int value; 129 int ix; 130 131 ix = index[codeUnit >> UTRIE2_SHIFT_2]; 132 ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); 133 value = data32[ix]; 134 return value; 135 136 } 137 138 /** 139 * Serialize a Trie2_32 onto an OutputStream. 140 * 141 * A Trie2 can be serialized multiple times. 142 * The serialized data is compatible with ICU4C UTrie2 serialization. 143 * Trie2 serialization is unrelated to Java object serialization. 144 * 145 * @param os the stream to which the serialized Trie2 data will be written. 146 * @return the number of bytes written. 147 * @throw IOException on an error writing to the OutputStream. 148 */ 149 public int serialize(OutputStream os) throws IOException { 150 DataOutputStream dos = new DataOutputStream(os); 151 int bytesWritten = 0; 152 153 bytesWritten += serializeHeader(dos); 154 for (int i=0; i<dataLength; i++) { 155 dos.writeInt(data32[i]); 156 } 157 bytesWritten += dataLength*4; 158 return bytesWritten; 159 } 160 161 /** 162 * @return the number of bytes of the serialized trie 163 */ 164 public int getSerializedLength() { 165 return 16+header.indexLength*2+dataLength*4; 166 } 167 168 /** 169 * Given a starting code point, find the last in a range of code points, 170 * all with the same value. 171 * 172 * This function is part of the implementation of iterating over the 173 * Trie2's contents. 174 * @param startingCP The code point at which to begin looking. 175 * @return The last code point with the same value as the starting code point. 176 */ 177 @Override 178 int rangeEnd(int startingCP, int limit, int value) { 179 int cp = startingCP; 180 int block = 0; 181 int index2Block = 0; 182 183 // Loop runs once for each of 184 // - a partial data block 185 // - a reference to the null (default) data block. 186 // - a reference to the index2 null block 187 188 outerLoop: 189 for (;;) { 190 if (cp >= limit) { 191 break; 192 } 193 if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { 194 // Ordinary BMP code point, excluding leading surrogates. 195 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 196 // 16 bit data is stored in the index array itself. 197 index2Block = 0; 198 block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; 199 } else if (cp < 0xffff) { 200 // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 201 index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; 202 block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; 203 } else if (cp < highStart) { 204 // Supplemental code point, use two-level lookup. 205 int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); 206 index2Block = index[ix]; 207 block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; 208 } else { 209 // Code point above highStart. 210 if (value == data32[highValueIndex]) { 211 cp = limit; 212 } 213 break; 214 } 215 216 if (index2Block == index2NullOffset) { 217 if (value != initialValue) { 218 break; 219 } 220 cp += UTRIE2_CP_PER_INDEX_1_ENTRY; 221 } else if (block == dataNullOffset) { 222 // The block at dataNullOffset has all values == initialValue. 223 // Because Trie2 iteration always proceeds in ascending order, we will always 224 // encounter a null block at its beginning, and can skip over 225 // a number of code points equal to the length of the block. 226 if (value != initialValue) { 227 break; 228 } 229 cp += UTRIE2_DATA_BLOCK_LENGTH; 230 } else { 231 // Current position refers to an ordinary data block. 232 // Walk over the data entries, checking the values. 233 int startIx = block + (cp & UTRIE2_DATA_MASK); 234 int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; 235 for (int ix = startIx; ix<limitIx; ix++) { 236 if (data32[ix] != value) { 237 // We came to an entry with a different value. 238 // We are done. 239 cp += (ix - startIx); 240 break outerLoop; 241 } 242 } 243 // The ordinary data block contained our value until its end. 244 // Advance the current code point, and continue the outer loop. 245 cp += limitIx - startIx; 246 } 247 } 248 if (cp > limit) { 249 cp = limit; 250 } 251 252 return cp - 1; 253 } 254 255} 256 257