1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3******************************************************************************
4* Copyright (C) 1996-2015, International Business Machines Corporation and
5* others. All Rights Reserved.
6******************************************************************************
7*/
8
9package android.icu.impl;
10
11import java.util.NoSuchElementException;
12
13import android.icu.lang.UCharacter;
14import android.icu.text.UTF16;
15import android.icu.util.RangeValueIterator;
16
17/**
18 * <p>Class enabling iteration of the values in a Trie.</p>
19 *
20 * <p>2015-sep-03 TODO: Only used in test code, move there.
21 *
22 * <p>Result of each iteration contains the interval of codepoints that have
23 * the same value type and the value type itself.</p>
24 * <p>The comparison of each codepoint value is done via extract(), which the
25 * default implementation is to return the value as it is.</p>
26 * <p>Method extract() can be overwritten to perform manipulations on
27 * codepoint values in order to perform specialized comparison.</p>
28 * <p>TrieIterator is designed to be a generic iterator for the CharTrie
29 * and the IntTrie, hence to accommodate both types of data, the return
30 * result will be in terms of int (32 bit) values.</p>
31 * <p>See android.icu.text.UCharacterTypeIterator for examples of use.</p>
32 * <p>Notes for porting utrie_enum from icu4c to icu4j:<br>
33 * Internally, icu4c's utrie_enum performs all iterations in its body. In Java
34 * sense, the caller will have to pass a object with a callback function
35 * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit,
36 * uint32_t value) into utrie_enum. utrie_enum will then find ranges of
37 * codepoints with the same value as determined by
38 * UTrieEnumValue(const void *context, uint32_t value). for each range,
39 * utrie_enum calls the callback function to perform a task. In this way,
40 * icu4c performs the iteration within utrie_enum.
41 * To follow the JDK model, icu4j is slightly different from icu4c.
42 * Instead of requesting the caller to implement an object for a callback.
43 * The caller will have to implement a subclass of TrieIterator, fleshing out
44 * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j,
45 * the caller will have to code his own iteration and flesh out the task
46 * (equivalent to UTrieEnumRange) to be performed in the iteration loop.
47 * </p>
48 * <p>There are basically 3 usage scenarios for porting:</p>
49 * <p>1) UTrieEnumValue is the only implemented callback then just implement a
50 * subclass of TrieIterator and override the extract(int) method. The
51 * extract(int) method is analogus to UTrieEnumValue callback.
52 * </p>
53 * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement
54 * a subclass of TrieIterator, override the extract method and iterate, e.g
55 * </p>
56 * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange,
57 *               set);<br>
58 * In Java :<br>
59 * <pre>
60 * class TrieIteratorImpl extends TrieIterator{
61 *     public TrieIteratorImpl(Trie data){
62 *         super(data);
63 *     }
64 *     public int extract(int value){
65 *         // port the implementation of _enumPropertyStartsValue here
66 *     }
67 * }
68 * ....
69 * TrieIterator fcdIter  = new TrieIteratorImpl(fcdTrieImpl.fcdTrie);
70 * while(fcdIter.next(result)) {
71 *     // port the implementation of _enumPropertyStartsRange
72 * }
73 * </pre>
74 * </p>
75 * <p>3) UTrieEnumRange is the only implemented callback then just implement
76 * the while loop, when utrie_enum is called
77 * <pre>
78 * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
79 * TrieIterator fcdIter  = new TrieIterator(fcdTrieImpl.fcdTrie);
80 * while(fcdIter.next(result)){
81 *     set.add(result.start);
82 * }
83 * </p>
84 * @author synwee
85 * @see android.icu.impl.Trie
86 * @hide Only a subset of ICU is exposed in Android
87 */
88public class TrieIterator implements RangeValueIterator
89
90{
91    // public constructor ---------------------------------------------
92
93    /**
94    * TrieEnumeration constructor
95    * @param trie to be used
96    * @exception IllegalArgumentException throw when argument is null.
97    */
98    public TrieIterator(Trie trie)
99    {
100        if (trie == null) {
101            throw new IllegalArgumentException(
102                                          "Argument trie cannot be null");
103        }
104        m_trie_             = trie;
105        // synwee: check that extract belongs to the child class
106        m_initialValue_     = extract(m_trie_.getInitialValue());
107        reset();
108    }
109
110    // public methods -------------------------------------------------
111
112    /**
113    * <p>Returns true if we are not at the end of the iteration, false
114    * otherwise.</p>
115    * <p>The next set of codepoints with the same value type will be
116    * calculated during this call and returned in the arguement element.</p>
117    * @param element return result
118    * @return true if we are not at the end of the iteration, false otherwise.
119    * @exception NoSuchElementException - if no more elements exist.
120    * @see android.icu.util.RangeValueIterator.Element
121    */
122    public final boolean next(Element element)
123    {
124        if (m_nextCodepoint_ > UCharacter.MAX_VALUE) {
125            return false;
126        }
127        if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE &&
128            calculateNextBMPElement(element)) {
129            return true;
130        }
131        calculateNextSupplementaryElement(element);
132        return true;
133    }
134
135    /**
136    * Resets the iterator to the beginning of the iteration
137    */
138    public final void reset()
139    {
140        m_currentCodepoint_ = 0;
141        m_nextCodepoint_    = 0;
142        m_nextIndex_        = 0;
143        m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_;
144        if (m_nextBlock_ == m_trie_.m_dataOffset_) {
145            m_nextValue_ = m_initialValue_;
146        }
147        else {
148            m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_));
149        }
150        m_nextBlockIndex_ = 0;
151        m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_;
152    }
153
154    // protected methods ----------------------------------------------
155
156    /**
157    * Called by next() to extracts a 32 bit value from a trie value
158    * used for comparison.
159    * This method is to be overwritten if special manipulation is to be done
160    * to retrieve a relevant comparison.
161    * The default function is to return the value as it is.
162    * @param value a value from the trie
163    * @return extracted value
164    */
165    protected int extract(int value)
166    {
167        return value;
168    }
169
170    // private methods ------------------------------------------------
171
172    /**
173    * Set the result values
174    * @param element return result object
175    * @param start codepoint of range
176    * @param limit (end + 1) codepoint of range
177    * @param value common value of range
178    */
179    private final void setResult(Element element, int start, int limit,
180                                 int value)
181    {
182        element.start = start;
183        element.limit = limit;
184        element.value = value;
185    }
186
187    /**
188    * Finding the next element.
189    * This method is called just before returning the result of
190    * next().
191    * We always store the next element before it is requested.
192    * In the case that we have to continue calculations into the
193    * supplementary planes, a false will be returned.
194    * @param element return result object
195    * @return true if the next range is found, false if we have to proceed to
196    *         the supplementary range.
197    */
198    private final boolean calculateNextBMPElement(Element element)
199    {
200        int currentValue    = m_nextValue_;
201        m_currentCodepoint_ = m_nextCodepoint_;
202        m_nextCodepoint_ ++;
203        m_nextBlockIndex_ ++;
204        if (!checkBlockDetail(currentValue)) {
205            setResult(element, m_currentCodepoint_, m_nextCodepoint_,
206                      currentValue);
207            return true;
208        }
209        // synwee check that next block index == 0 here
210        // enumerate BMP - the main loop enumerates data blocks
211        while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) {
212            // because of the way the character is split to form the index
213            // the lead surrogate and trail surrogate can not be in the
214            // mid of a block
215            if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) {
216                // skip lead surrogate code units,
217                // go to lead surrogate codepoints
218                m_nextIndex_ = BMP_INDEX_LENGTH_;
219            }
220            else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) {
221                // go back to regular BMP code points
222                m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_;
223            } else {
224                m_nextIndex_ ++;
225            }
226
227            m_nextBlockIndex_ = 0;
228            if (!checkBlock(currentValue)) {
229                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
230                          currentValue);
231                return true;
232            }
233        }
234        m_nextCodepoint_ --;   // step one back since this value has not been
235        m_nextBlockIndex_ --;  // retrieved yet.
236        return false;
237    }
238
239    /**
240    * Finds the next supplementary element.
241    * For each entry in the trie, the value to be delivered is passed through
242    * extract().
243    * We always store the next element before it is requested.
244    * Called after calculateNextBMP() completes its round of BMP characters.
245    * There is a slight difference in the usage of m_currentCodepoint_
246    * here as compared to calculateNextBMP(). Though both represents the
247    * lower bound of the next element, in calculateNextBMP() it gets set
248    * at the start of any loop, where-else, in calculateNextSupplementary()
249    * since m_currentCodepoint_ already contains the lower bound of the
250    * next element (passed down from calculateNextBMP()), we keep it till
251    * the end before resetting it to the new value.
252    * Note, if there are no more iterations, it will never get to here.
253    * Blocked out by next().
254    * @param element return result object
255    */
256    private final void calculateNextSupplementaryElement(Element element)
257    {
258        int currentValue = m_nextValue_;
259        m_nextCodepoint_ ++;
260        m_nextBlockIndex_ ++;
261
262        if (UTF16.getTrailSurrogate(m_nextCodepoint_)
263                                        != UTF16.TRAIL_SURROGATE_MIN_VALUE) {
264            // this piece is only called when we are in the middle of a lead
265            // surrogate block
266            if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) {
267                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
268                          currentValue);
269                m_currentCodepoint_ = m_nextCodepoint_;
270                return;
271            }
272            // we have cleared one block
273            m_nextIndex_ ++;
274            m_nextTrailIndexOffset_ ++;
275            if (!checkTrailBlock(currentValue)) {
276                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
277                          currentValue);
278                m_currentCodepoint_ = m_nextCodepoint_;
279                return;
280            }
281        }
282        int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
283        // enumerate supplementary code points
284        while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) {
285            // lead surrogate access
286            final int leadBlock =
287                   m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
288                                                   Trie.INDEX_STAGE_2_SHIFT_;
289            if (leadBlock == m_trie_.m_dataOffset_) {
290                // no entries for a whole block of lead surrogates
291                if (currentValue != m_initialValue_) {
292                    m_nextValue_      = m_initialValue_;
293                    m_nextBlock_      = leadBlock;  // == m_trie_.m_dataOffset_
294                    m_nextBlockIndex_ = 0;
295                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
296                              currentValue);
297                    m_currentCodepoint_ = m_nextCodepoint_;
298                    return;
299                }
300
301                nextLead += DATA_BLOCK_LENGTH_;
302                // number of total affected supplementary codepoints in one
303                // block
304                // this is not a simple addition of
305                // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider
306                // that we might have moved some of the codepoints
307                m_nextCodepoint_ = Character.toCodePoint((char)nextLead, (char)UTF16.TRAIL_SURROGATE_MIN_VALUE);
308                continue;
309            }
310            if (m_trie_.m_dataManipulate_ == null) {
311                throw new NullPointerException(
312                            "The field DataManipulate in this Trie is null");
313            }
314            // enumerate trail surrogates for this lead surrogate
315            m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
316                               m_trie_.getValue(leadBlock +
317                                   (nextLead & Trie.INDEX_STAGE_3_MASK_)));
318            if (m_nextIndex_ <= 0) {
319                // no data for this lead surrogate
320                if (currentValue != m_initialValue_) {
321                    m_nextValue_      = m_initialValue_;
322                    m_nextBlock_      = m_trie_.m_dataOffset_;
323                    m_nextBlockIndex_ = 0;
324                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
325                              currentValue);
326                    m_currentCodepoint_ = m_nextCodepoint_;
327                    return;
328                }
329                m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_;
330            } else {
331                m_nextTrailIndexOffset_ = 0;
332                if (!checkTrailBlock(currentValue)) {
333                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
334                              currentValue);
335                    m_currentCodepoint_ = m_nextCodepoint_;
336                    return;
337                }
338            }
339            nextLead ++;
340         }
341
342         // deliver last range
343         setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1,
344                   currentValue);
345    }
346
347    /**
348    * Internal block value calculations
349    * Performs calculations on a data block to find codepoints in m_nextBlock_
350    * after the index m_nextBlockIndex_ that has the same value.
351    * Note m_*_ variables at this point is the next codepoint whose value
352    * has not been calculated.
353    * But when returned with false, it will be the last codepoint whose
354    * value has been calculated.
355    * @param currentValue the value which other codepoints are tested against
356    * @return true if the whole block has the same value as currentValue or if
357    *              the whole block has been calculated, false otherwise.
358    */
359    private final boolean checkBlockDetail(int currentValue)
360    {
361        while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) {
362            m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ +
363                                                    m_nextBlockIndex_));
364            if (m_nextValue_ != currentValue) {
365                return false;
366            }
367            ++ m_nextBlockIndex_;
368            ++ m_nextCodepoint_;
369        }
370        return true;
371    }
372
373    /**
374    * Internal block value calculations
375    * Performs calculations on a data block to find codepoints in m_nextBlock_
376    * that has the same value.
377    * Will call checkBlockDetail() if highlevel check fails.
378    * Note m_*_ variables at this point is the next codepoint whose value
379    * has not been calculated.
380    * @param currentBlock the initial block containing all currentValue
381    * @param currentValue the value which other codepoints are tested against
382    * @return true if the whole block has the same value as currentValue or if
383    *              the whole block has been calculated, false otherwise.
384    */
385    private final boolean checkBlock(int currentValue)
386    {
387        int currentBlock = m_nextBlock_;
388        m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] <<
389                                                  Trie.INDEX_STAGE_2_SHIFT_;
390        if (m_nextBlock_ == currentBlock &&
391            (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) {
392            // the block is the same as the previous one, filled with
393            // currentValue
394            m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
395        }
396        else if (m_nextBlock_ == m_trie_.m_dataOffset_) {
397            // this is the all-initial-value block
398            if (currentValue != m_initialValue_) {
399                m_nextValue_      = m_initialValue_;
400                m_nextBlockIndex_ = 0;
401                return false;
402            }
403            m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
404        }
405        else {
406            if (!checkBlockDetail(currentValue)) {
407                return false;
408            }
409        }
410        return true;
411    }
412
413    /**
414    * Internal block value calculations
415    * Performs calculations on multiple data blocks for a set of trail
416    * surrogates to find codepoints in m_nextBlock_ that has the same value.
417    * Will call checkBlock() for internal block checks.
418    * Note m_*_ variables at this point is the next codepoint whose value
419    * has not been calculated.
420    * @param currentValue the value which other codepoints are tested against
421    * @return true if the whole block has the same value as currentValue or if
422    *              the whole block has been calculated, false otherwise.
423    */
424    private final boolean checkTrailBlock(int currentValue)
425    {
426        // enumerate code points for this lead surrogate
427        while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_)
428        {
429            // if we ever reach here, we are at the start of a new block
430            m_nextBlockIndex_ = 0;
431            // copy of most of the body of the BMP loop
432            if (!checkBlock(currentValue)) {
433                return false;
434            }
435            m_nextTrailIndexOffset_ ++;
436            m_nextIndex_ ++;
437        }
438        return true;
439    }
440
441    /**
442    * Checks if we are beginning at the start of a initial block.
443    * If we are then the rest of the codepoints in this initial block
444    * has the same values.
445    * We increment m_nextCodepoint_ and relevant data members if so.
446    * This is used only in for the supplementary codepoints because
447    * the offset to the trail indexes could be 0.
448    * @return true if we are at the start of a initial block.
449    */
450    private final boolean checkNullNextTrailIndex()
451    {
452        if (m_nextIndex_ <= 0) {
453            m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
454            int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
455            int leadBlock =
456                   m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
457                                                   Trie.INDEX_STAGE_2_SHIFT_;
458            if (m_trie_.m_dataManipulate_ == null) {
459                throw new NullPointerException(
460                            "The field DataManipulate in this Trie is null");
461            }
462            m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
463                               m_trie_.getValue(leadBlock +
464                                   (nextLead & Trie.INDEX_STAGE_3_MASK_)));
465            m_nextIndex_ --;
466            m_nextBlockIndex_ =  DATA_BLOCK_LENGTH_;
467            return true;
468        }
469        return false;
470    }
471
472    // private data members --------------------------------------------
473
474    /**
475    * Size of the stage 1 BMP indexes
476    */
477    private static final int BMP_INDEX_LENGTH_ =
478                                        0x10000 >> Trie.INDEX_STAGE_1_SHIFT_;
479    /**
480    * Lead surrogate minimum value
481    */
482    private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800;
483    /**
484    * Trail surrogate minimum value
485    */
486    private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00;
487    /*
488    * Trail surrogate maximum value
489    */
490    //private static final int TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF;
491    /**
492    * Number of trail surrogate
493    */
494    private static final int TRAIL_SURROGATE_COUNT_ = 0x400;
495    /**
496    * Number of stage 1 indexes for supplementary calculations that maps to
497    * each lead surrogate character.
498    * See second pass into getRawOffset for the trail surrogate character.
499    * 10 for significant number of bits for trail surrogates, 5 for what we
500    * discard during shifting.
501    */
502    private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ =
503                                    1 << (10 - Trie.INDEX_STAGE_1_SHIFT_);
504    /**
505    * Number of data values in a stage 2 (data array) block.
506    */
507    private static final int DATA_BLOCK_LENGTH_ =
508                                              1 << Trie.INDEX_STAGE_1_SHIFT_;
509//    /**
510//    * Number of codepoints in a stage 2 block
511//    */
512//    private static final int DATA_BLOCK_SUPPLEMENTARY_LENGTH_ =
513//                                                     DATA_BLOCK_LENGTH_ << 10;
514    /**
515    * Trie instance
516    */
517    private Trie m_trie_;
518    /**
519    * Initial value for trie values
520    */
521    private int m_initialValue_;
522    /**
523    * Next element results and data.
524    */
525    private int m_currentCodepoint_;
526    private int m_nextCodepoint_;
527    private int m_nextValue_;
528    private int m_nextIndex_;
529    private int m_nextBlock_;
530    private int m_nextBlockIndex_;
531    private int m_nextTrailIndexOffset_;
532}
533