Trie2_32.java revision 2ae130017183d2f66d55bf0ca51f8da3294644fd
1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3 *******************************************************************************
4 * Copyright (C) 2009-2014, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 *******************************************************************************
7 */
8
9package android.icu.impl;
10
11import java.io.DataOutputStream;
12import java.io.IOException;
13import java.io.OutputStream;
14import java.nio.ByteBuffer;
15
16/**
17 * @author aheninger
18 *
19 * A read-only Trie2, holding 32 bit data values.
20 *
21 * A Trie2 is a highly optimized data structure for mapping from Unicode
22 * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value.
23 *
24 * See class Trie2 for descriptions of the API for accessing the contents of a trie.
25 *
26 * The fundamental data access methods are declared final in this class, with
27 * the intent that applications might gain a little extra performance, when compared
28 * with calling the same methods via the abstract UTrie2 base class.
29 */
30
31public class Trie2_32 extends Trie2 {
32
33    /**
34     * Internal constructor, not for general use.
35     */
36    Trie2_32() {
37    }
38
39
40    /**
41     * Create a Trie2 from its serialized form.  Inverse of utrie2_serialize().
42     * The serialized format is identical between ICU4C and ICU4J, so this function
43     * will work with serialized Trie2s from either.
44     *
45     * The serialized Trie2 in the bytes may be in either little or big endian byte order.
46     * This allows using serialized Tries from ICU4C without needing to consider the
47     * byte order of the system that created them.
48     *
49     * @param bytes a byte buffer to the serialized form of a UTrie2.
50     * @return An unserialized Trie_32, ready for use.
51     * @throws IllegalArgumentException if the stream does not contain a serialized Trie2.
52     * @throws IOException if a read error occurs in the buffer.
53     * @throws ClassCastException if the bytes contains a serialized Trie2_16
54     */
55    public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException {
56        return (Trie2_32) Trie2.createFromSerialized(bytes);
57    }
58
59    /**
60     * Get the value for a code point as stored in the Trie2.
61     *
62     * @param codePoint the code point
63     * @return the value
64     */
65    @Override
66    public final int get(int codePoint) {
67        int value;
68        int ix;
69
70        if (codePoint >= 0) {
71            if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
72                // Ordinary BMP code point, excluding leading surrogates.
73                // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
74                // 32 bit data is stored in the index array itself.
75                ix = index[codePoint >> UTRIE2_SHIFT_2];
76                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
77                value = data32[ix];
78                return value;
79            }
80            if (codePoint <= 0xffff) {
81                // Lead Surrogate Code Point.  A Separate index section is stored for
82                // lead surrogate code units and code points.
83                //   The main index has the code unit data.
84                //   For this function, we need the code point data.
85                // Note: this expression could be refactored for slightly improved efficiency, but
86                //       surrogate code points will be so rare in practice that it's not worth it.
87                ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
88                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
89                value = data32[ix];
90                return value;
91            }
92            if (codePoint < highStart) {
93                // Supplemental code point, use two-level lookup.
94                ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1);
95                ix = index[ix];
96                ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
97                ix = index[ix];
98                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
99                value = data32[ix];
100                return value;
101            }
102            if (codePoint <= 0x10ffff) {
103                value = data32[highValueIndex];
104                return value;
105            }
106        }
107
108        // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
109        return errorValue;
110    }
111
112
113    /**
114     * Get a Trie2 value for a UTF-16 code unit.
115     *
116     * This function returns the same value as get() if the input
117     * character is outside of the lead surrogate range
118     *
119     * There are two values stored in a Trie2 for inputs in the lead
120     * surrogate range.  This function returns the alternate value,
121     * while Trie2.get() returns the main value.
122     *
123     * @param codeUnit a 16 bit code unit or lead surrogate value.
124     * @return the value
125     */
126    @Override
127    public int getFromU16SingleLead(char codeUnit){
128        int value;
129        int ix;
130
131        ix = index[codeUnit >> UTRIE2_SHIFT_2];
132        ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK);
133        value = data32[ix];
134        return value;
135
136    }
137
138    /**
139     * Serialize a Trie2_32 onto an OutputStream.
140     *
141     * A Trie2 can be serialized multiple times.
142     * The serialized data is compatible with ICU4C UTrie2 serialization.
143     * Trie2 serialization is unrelated to Java object serialization.
144     *
145     * @param os the stream to which the serialized Trie2 data will be written.
146     * @return the number of bytes written.
147     * @throw IOException on an error writing to the OutputStream.
148     */
149    public int serialize(OutputStream os) throws IOException {
150        DataOutputStream dos = new DataOutputStream(os);
151        int  bytesWritten = 0;
152
153        bytesWritten += serializeHeader(dos);
154        for (int i=0; i<dataLength; i++) {
155            dos.writeInt(data32[i]);
156        }
157        bytesWritten += dataLength*4;
158        return bytesWritten;
159    }
160
161    /**
162     * @return the number of bytes of the serialized trie
163     */
164    public int getSerializedLength() {
165        return 16+header.indexLength*2+dataLength*4;
166    }
167
168    /**
169     * Given a starting code point, find the last in a range of code points,
170     * all with the same value.
171     *
172     * This function is part of the implementation of iterating over the
173     * Trie2's contents.
174     * @param startingCP The code point at which to begin looking.
175     * @return The last code point with the same value as the starting code point.
176     */
177    @Override
178    int rangeEnd(int startingCP, int limit, int value) {
179        int   cp = startingCP;
180        int   block = 0;
181        int   index2Block = 0;
182
183        // Loop runs once for each of
184        //   - a partial data block
185        //   - a reference to the null (default) data block.
186        //   - a reference to the index2 null block
187
188      outerLoop:
189        for (;;) {
190            if (cp >= limit) {
191                break;
192            }
193            if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) {
194                // Ordinary BMP code point, excluding leading surrogates.
195                // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
196                // 16 bit data is stored in the index array itself.
197                index2Block = 0;
198                block       = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT;
199            } else if (cp < 0xffff) {
200                // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00
201                index2Block = UTRIE2_LSCP_INDEX_2_OFFSET;
202                block       = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT;
203            } else if (cp < highStart) {
204                // Supplemental code point, use two-level lookup.
205                int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1);
206                index2Block = index[ix];
207                block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT;
208            } else  {
209                // Code point above highStart.
210                if (value == data32[highValueIndex]) {
211                    cp = limit;
212                }
213                break;
214            }
215
216            if (index2Block == index2NullOffset) {
217                if (value != initialValue) {
218                    break;
219                }
220                cp += UTRIE2_CP_PER_INDEX_1_ENTRY;
221            } else if (block == dataNullOffset) {
222                // The block at dataNullOffset has all values == initialValue.
223                // Because Trie2 iteration always proceeds in ascending order, we will always
224                //   encounter a null block at its beginning, and can skip over
225                //   a number of code points equal to the length of the block.
226                if (value != initialValue) {
227                    break;
228                }
229                cp += UTRIE2_DATA_BLOCK_LENGTH;
230            } else {
231                // Current position refers to an ordinary data block.
232                // Walk over the data entries, checking the values.
233                int startIx = block + (cp & UTRIE2_DATA_MASK);
234                int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH;
235                for (int ix = startIx; ix<limitIx; ix++) {
236                    if (data32[ix] != value) {
237                        // We came to an entry with a different value.
238                        //   We are done.
239                        cp += (ix - startIx);
240                        break outerLoop;
241                    }
242                }
243                // The ordinary data block contained our value until its end.
244                //  Advance the current code point, and continue the outer loop.
245                cp += limitIx - startIx;
246            }
247        }
248        if (cp > limit) {
249            cp = limit;
250        }
251
252        return cp - 1;
253    }
254
255}
256
257