1/*
2 ******************************************************************************
3 * Copyright (C) 1996-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7
8package com.ibm.icu.impl;
9
10import java.io.IOException;
11import java.nio.ByteBuffer;
12import java.util.Arrays;
13
14import com.ibm.icu.text.UTF16;
15
16/**
17 * Trie implementation which stores data in int, 32 bits.
18 * 2015-sep-03: Used only in CharsetSelector which could be switched to {@link Trie2_32}
19 * as long as that does not load ICU4C selector data.
20 *
21 * @author synwee
22 * @see com.ibm.icu.impl.Trie
23 * @since release 2.1, Jan 01 2002
24 */
25public class IntTrie extends Trie
26{
27    // public constructors ---------------------------------------------
28
29    /**
30    * <p>Creates a new Trie with the settings for the trie data.</p>
31    * <p>Unserialize the 32-bit-aligned input stream and use the data for the
32    * trie.</p>
33    * @param bytes file buffer to a ICU data file, containing the trie
34    * @param dataManipulate object which provides methods to parse the char
35    *                        data
36    * @throws IOException thrown when data reading fails
37    */
38    public IntTrie(ByteBuffer bytes, DataManipulate dataManipulate)
39                                                    throws IOException
40    {
41        super(bytes, dataManipulate);
42        if (!isIntTrie()) {
43            throw new IllegalArgumentException(
44                               "Data given does not belong to a int trie.");
45        }
46    }
47
48    /**
49     * Make a dummy IntTrie.
50     * A dummy trie is an empty runtime trie, used when a real data trie cannot
51     * be loaded.
52     *
53     * The trie always returns the initialValue,
54     * or the leadUnitValue for lead surrogate code points.
55     * The Latin-1 part is always set up to be linear.
56     *
57     * @param initialValue the initial value that is set for all code points
58     * @param leadUnitValue the value for lead surrogate code _units_ that do not
59     *                      have associated supplementary data
60     * @param dataManipulate object which provides methods to parse the char data
61     */
62    @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770
63    public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
64        super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
65
66        int dataLength, latin1Length, i, limit;
67        char block;
68
69        /* calculate the actual size of the dummy trie data */
70
71        /* max(Latin-1, block 0) */
72        dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
73        if(leadUnitValue!=initialValue) {
74            dataLength+=DATA_BLOCK_LENGTH;
75        }
76        m_data_=new int[dataLength];
77        m_dataLength_=dataLength;
78
79        m_initialValue_=initialValue;
80
81        /* fill the index and data arrays */
82
83        /* indexes are preset to 0 (block 0) */
84
85        /* Latin-1 data */
86        for(i=0; i<latin1Length; ++i) {
87            m_data_[i]=initialValue;
88        }
89
90        if(leadUnitValue!=initialValue) {
91            /* indexes for lead surrogate code units to the block after Latin-1 */
92            block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
93            i=0xd800>>INDEX_STAGE_1_SHIFT_;
94            limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
95            for(; i<limit; ++i) {
96                m_index_[i]=block;
97            }
98
99            /* data for lead surrogate code units */
100            limit=latin1Length+DATA_BLOCK_LENGTH;
101            for(i=latin1Length; i<limit; ++i) {
102                m_data_[i]=leadUnitValue;
103            }
104        }
105    }
106
107    // public methods --------------------------------------------------
108
109    /**
110    * Gets the value associated with the codepoint.
111    * If no value is associated with the codepoint, a default value will be
112    * returned.
113    * @param ch codepoint
114    * @return offset to data
115    */
116    public final int getCodePointValue(int ch)
117    {
118        int offset;
119
120        // fastpath for U+0000..U+D7FF
121        if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
122            // copy of getRawOffset()
123            offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
124                    + (ch & INDEX_STAGE_3_MASK_);
125            return m_data_[offset];
126        }
127
128        // handle U+D800..U+10FFFF
129        offset = getCodePointOffset(ch);
130        return (offset >= 0) ? m_data_[offset] : m_initialValue_;
131    }
132
133    /**
134    * Gets the value to the data which this lead surrogate character points
135    * to.
136    * Returned data may contain folding offset information for the next
137    * trailing surrogate character.
138    * This method does not guarantee correct results for trail surrogates.
139    * @param ch lead surrogate character
140    * @return data value
141    */
142    public final int getLeadValue(char ch)
143    {
144        return m_data_[getLeadOffset(ch)];
145    }
146
147    /**
148    * Get the value associated with the BMP code point.
149    * Lead surrogate code points are treated as normal code points, with
150    * unfolded values that may differ from getLeadValue() results.
151    * @param ch the input BMP code point
152    * @return trie data value associated with the BMP codepoint
153    */
154    public final int getBMPValue(char ch)
155    {
156        return m_data_[getBMPOffset(ch)];
157    }
158
159    /**
160    * Get the value associated with a pair of surrogates.
161    * @param lead a lead surrogate
162    * @param trail a trail surrogate
163    */
164    public final int getSurrogateValue(char lead, char trail)
165    {
166        if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
167            throw new IllegalArgumentException(
168                "Argument characters do not form a supplementary character");
169        }
170        // get fold position for the next trail surrogate
171        int offset = getSurrogateOffset(lead, trail);
172
173        // get the real data from the folded lead/trail units
174        if (offset > 0) {
175            return m_data_[offset];
176        }
177
178        // return m_initialValue_ if there is an error
179        return m_initialValue_;
180    }
181
182    /**
183    * Get a value from a folding offset (from the value of a lead surrogate)
184    * and a trail surrogate.
185    * @param leadvalue the value of a lead surrogate that contains the
186    *        folding offset
187    * @param trail surrogate
188    * @return trie data value associated with the trail character
189    */
190    public final int getTrailValue(int leadvalue, char trail)
191    {
192        if (m_dataManipulate_ == null) {
193            throw new NullPointerException(
194                             "The field DataManipulate in this Trie is null");
195        }
196        int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
197        if (offset > 0) {
198            return m_data_[getRawOffset(offset,
199                                         (char)(trail & SURROGATE_MASK_))];
200        }
201        return m_initialValue_;
202    }
203
204    /**
205     * <p>Gets the latin 1 fast path value.</p>
206     * <p>Note this only works if latin 1 characters have their own linear
207     * array.</p>
208     * @param ch latin 1 characters
209     * @return value associated with latin character
210     */
211    public final int getLatin1LinearValue(char ch)
212    {
213        return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch];
214    }
215
216    /**
217     * Checks if the argument Trie has the same data as this Trie
218     * @param other Trie to check
219     * @return true if the argument Trie has the same data as this Trie, false
220     *         otherwise
221     */
222    ///CLOVER:OFF
223    public boolean equals(Object other)
224    {
225        boolean result = super.equals(other);
226        if (result && other instanceof IntTrie) {
227            IntTrie othertrie = (IntTrie)other;
228            if (m_initialValue_ != othertrie.m_initialValue_
229                || !Arrays.equals(m_data_, othertrie.m_data_)) {
230                return false;
231            }
232            return true;
233        }
234        return false;
235    }
236
237    public int hashCode() {
238        assert false : "hashCode not designed";
239        return 42;
240    }
241    ///CLOVER:ON
242
243    // protected methods -----------------------------------------------
244
245    /**
246    * <p>Parses the input stream and stores its trie content into a index and
247    * data array</p>
248    * @param bytes data buffer containing trie data
249    */
250    protected final void unserialize(ByteBuffer bytes)
251    {
252        super.unserialize(bytes);
253        // one used for initial value
254        m_data_ = ICUBinary.getInts(bytes, m_dataLength_, 0);
255        m_initialValue_ = m_data_[0];
256    }
257
258    /**
259    * Gets the offset to the data which the surrogate pair points to.
260    * @param lead lead surrogate
261    * @param trail trailing surrogate
262    * @return offset to data
263    */
264    protected final int getSurrogateOffset(char lead, char trail)
265    {
266        if (m_dataManipulate_ == null) {
267            throw new NullPointerException(
268                             "The field DataManipulate in this Trie is null");
269        }
270        // get fold position for the next trail surrogate
271        int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
272
273        // get the real data from the folded lead/trail units
274        if (offset > 0) {
275            return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
276        }
277
278        // return -1 if there is an error, in this case we return the default
279        // value: m_initialValue_
280        return -1;
281    }
282
283    /**
284    * Gets the value at the argument index.
285    * For use internally in TrieIterator
286    * @param index value at index will be retrieved
287    * @return 32 bit value
288    * @see com.ibm.icu.impl.TrieIterator
289    */
290    protected final int getValue(int index)
291    {
292      return m_data_[index];
293    }
294
295    /**
296    * Gets the default initial value
297    * @return 32 bit value
298    */
299    protected final int getInitialValue()
300    {
301        return m_initialValue_;
302    }
303
304    // package private methods -----------------------------------------
305
306    /**
307     * Internal constructor for builder use
308     * @param index the index array to be slotted into this trie
309     * @param data the data array to be slotted into this trie
310     * @param initialvalue the initial value for this trie
311     * @param options trie options to use
312     * @param datamanipulate folding implementation
313     */
314    IntTrie(char index[], int data[], int initialvalue, int options,
315            DataManipulate datamanipulate)
316    {
317        super(index, options, datamanipulate);
318        m_data_ = data;
319        m_dataLength_ = m_data_.length;
320        m_initialValue_ = initialvalue;
321    }
322
323    // private data members --------------------------------------------
324
325    /**
326    * Default value
327    */
328    private int m_initialValue_;
329    /**
330    * Array of char data
331    */
332    private int m_data_[];
333}
334