1/*
2******************************************************************************
3* Copyright (C) 1996-2011, International Business Machines Corporation and   *
4* others. All Rights Reserved.                                               *
5******************************************************************************
6*/
7
8package com.ibm.icu.lang;
9
10import com.ibm.icu.impl.UCharacterName;
11import com.ibm.icu.impl.UCharacterNameChoice;
12import com.ibm.icu.util.ValueIterator;
13
14/**
15 * <p>Class enabling iteration of the codepoints and their names.</p>
16 * <p>Result of each iteration contains a valid codepoint that has valid
17 * name.</p>
18 * <p>See UCharacter.getNameIterator() for an example of use.</p>
19 * @author synwee
20 * @since release 2.1, March 5 2002
21 */
22class UCharacterNameIterator implements ValueIterator
23{
24    // public methods ----------------------------------------------------
25
26    /**
27    * <p>Gets the next result for this iteration and returns
28    * true if we are not at the end of the iteration, false otherwise.</p>
29    * <p>If the return boolean is a false, the contents of elements will not
30    * be updated.</p>
31    * @param element for storing the result codepoint and name
32    * @return true if we are not at the end of the iteration, false otherwise.
33    * @see com.ibm.icu.util.ValueIterator.Element
34    */
35    public boolean next(ValueIterator.Element element)
36    {
37        if (m_current_ >= m_limit_) {
38            return false;
39        }
40
41        if (m_choice_ == UCharacterNameChoice.UNICODE_CHAR_NAME ||
42            m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME
43        ) {
44            int length = m_name_.getAlgorithmLength();
45            if (m_algorithmIndex_ < length) {
46                while (m_algorithmIndex_ < length) {
47                    // find the algorithm range that could contain m_current_
48                    if (m_algorithmIndex_ < 0 ||
49                        m_name_.getAlgorithmEnd(m_algorithmIndex_) <
50                        m_current_) {
51                        m_algorithmIndex_ ++;
52                    }
53                    else {
54                        break;
55                    }
56                }
57
58                if (m_algorithmIndex_ < length) {
59                    // interleave the data-driven ones with the algorithmic ones
60                    // iterate over all algorithmic ranges; assume that they are
61                    // in ascending order
62                    int start = m_name_.getAlgorithmStart(m_algorithmIndex_);
63                    if (m_current_ < start) {
64                        // this should get rid of those codepoints that are not
65                        // in the algorithmic range
66                        int end = start;
67                        if (m_limit_ <= start) {
68                            end = m_limit_;
69                        }
70                        if (!iterateGroup(element, end)) {
71                            m_current_ ++;
72                            return true;
73                        }
74                    }
75                    /*
76                    // "if (m_current_ >= m_limit_)" would not return true
77                    // because it can never be reached due to:
78                    // 1) It has already been checked earlier
79                    // 2) When m_current_ is updated earlier, it returns true
80                    // 3) No updates on m_limit_*/
81                    if (m_current_ >= m_limit_) {
82                        // after iterateGroup fails, current codepoint may be
83                        // greater than limit
84                        return false;
85                    }
86
87                    element.integer = m_current_;
88                    element.value   = m_name_.getAlgorithmName(m_algorithmIndex_,
89                                                                   m_current_);
90                    // reset the group index if we are in the algorithmic names
91                    m_groupIndex_ = -1;
92                    m_current_ ++;
93                    return true;
94                }
95            }
96        }
97        // enumerate the character names after the last algorithmic range
98        if (!iterateGroup(element, m_limit_)) {
99            m_current_ ++;
100            return true;
101        }
102        else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
103            if (!iterateExtended(element, m_limit_)) {
104                m_current_ ++;
105                return true;
106            }
107        }
108
109        return false;
110    }
111
112    /**
113    * <p>Resets the iterator to start iterating from the integer index
114    * UCharacter.MIN_VALUE or X if a setRange(X, Y) has been called previously.
115    * </p>
116    */
117    public void reset()
118    {
119        m_current_        = m_start_;
120        m_groupIndex_     = -1;
121        m_algorithmIndex_ = -1;
122    }
123
124    /**
125     * <p>Restricts the range of integers to iterate and resets the iteration
126     * to begin at the index argument start.</p>
127     * <p>If setRange(start, end) is not performed before next(element) is
128     * called, the iteration will start from the integer index
129     * UCharacter.MIN_VALUE and end at UCharacter.MAX_VALUE.</p>
130     * <p>
131     * If this range is set outside the range of UCharacter.MIN_VALUE and
132     * UCharacter.MAX_VALUE, next(element) will always return false.
133     * </p>
134     * @param start first integer in range to iterate
135     * @param limit 1 integer after the last integer in range
136     * @exception IllegalArgumentException thrown when attempting to set an
137     *            illegal range. E.g limit <= start
138     */
139    public void setRange(int start, int limit)
140    {
141        if (start >= limit) {
142            throw new IllegalArgumentException(
143                "start or limit has to be valid Unicode codepoints and start < limit");
144        }
145        if (start < UCharacter.MIN_VALUE) {
146            m_start_ = UCharacter.MIN_VALUE;
147        }
148        else {
149            m_start_ = start;
150        }
151
152        if (limit > UCharacter.MAX_VALUE + 1) {
153            m_limit_ = UCharacter.MAX_VALUE + 1;
154        }
155        else {
156            m_limit_ = limit;
157        }
158        m_current_ = m_start_;
159    }
160
161    // protected constructor ---------------------------------------------
162
163    /**
164    * Constructor
165    * @param name name data
166    * @param choice name choice from the class
167    *               com.ibm.icu.lang.UCharacterNameChoice
168    */
169    protected UCharacterNameIterator(UCharacterName name, int choice)
170    {
171        if(name==null){
172            throw new IllegalArgumentException("UCharacterName name argument cannot be null. Missing unames.icu?");
173        }
174        m_name_    = name;
175        // no explicit choice in UCharacter so no checks on choice
176        m_choice_  = choice;
177        m_start_   = UCharacter.MIN_VALUE;
178        m_limit_   = UCharacter.MAX_VALUE + 1;
179        m_current_ = m_start_;
180    }
181
182    // private data members ---------------------------------------------
183
184    /**
185     * Name data
186     */
187    private UCharacterName m_name_;
188    /**
189     * Name choice
190     */
191    private int m_choice_;
192     /**
193     * Start iteration range
194     */
195    private int m_start_;
196    /**
197     * End + 1 iteration range
198     */
199    private int m_limit_;
200    /**
201     * Current codepoint
202     */
203    private int m_current_;
204    /**
205     * Group index
206     */
207    private int m_groupIndex_ = -1;
208    /**
209     * Algorithm index
210     */
211    private int m_algorithmIndex_ = -1;
212    /**
213    * Group use
214    */
215    private static char GROUP_OFFSETS_[] =
216                                new char[UCharacterName.LINES_PER_GROUP_ + 1];
217    private static char GROUP_LENGTHS_[] =
218                                new char[UCharacterName.LINES_PER_GROUP_ + 1];
219
220    // private methods --------------------------------------------------
221
222    /**
223     * Group name iteration, iterate all the names in the current 32-group and
224     * returns the first codepoint that has a valid name.
225     * @param result stores the result codepoint and name
226     * @param limit last codepoint + 1 in range to search
227     * @return false if a codepoint with a name is found in group and we can
228     *         bail from further iteration, true to continue on with the
229     *         iteration
230     */
231    private boolean iterateSingleGroup(ValueIterator.Element result, int limit)
232    {
233        synchronized(GROUP_OFFSETS_) {
234        synchronized(GROUP_LENGTHS_) {
235            int index = m_name_.getGroupLengths(m_groupIndex_, GROUP_OFFSETS_,
236                                                GROUP_LENGTHS_);
237            while (m_current_ < limit) {
238                int    offset = UCharacterName.getGroupOffset(m_current_);
239                String name   = m_name_.getGroupName(
240                                          index + GROUP_OFFSETS_[offset],
241                                          GROUP_LENGTHS_[offset], m_choice_);
242                if ((name == null || name.length() == 0) &&
243                    m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
244                    name = m_name_.getExtendedName(m_current_);
245                }
246                if (name != null && name.length() > 0) {
247                    result.integer = m_current_;
248                    result.value   = name;
249                    return false;
250                }
251                ++ m_current_;
252            }
253        }
254        }
255        return true;
256    }
257
258    /**
259     * Group name iteration, iterate all the names in the current 32-group and
260     * returns the first codepoint that has a valid name.
261     * @param result stores the result codepoint and name
262     * @param limit last codepoint + 1 in range to search
263     * @return false if a codepoint with a name is found in group and we can
264     *         bail from further iteration, true to continue on with the
265     *         iteration
266     */
267    private boolean iterateGroup(ValueIterator.Element result, int limit)
268    {
269        if (m_groupIndex_ < 0) {
270            m_groupIndex_ = m_name_.getGroup(m_current_);
271        }
272
273        while (m_groupIndex_ < m_name_.m_groupcount_ &&
274               m_current_ < limit) {
275            // iterate till the last group or the last codepoint
276            int startMSB = UCharacterName.getCodepointMSB(m_current_);
277            int gMSB     = m_name_.getGroupMSB(m_groupIndex_); // can be -1
278            if (startMSB == gMSB) {
279                if (startMSB == UCharacterName.getCodepointMSB(limit - 1)) {
280                    // if start and limit - 1 are in the same group, then enumerate
281                    // only in that one
282                    return iterateSingleGroup(result, limit);
283                }
284                // enumerate characters in the partial start group
285                // if (m_name_.getGroupOffset(m_current_) != 0) {
286                if (!iterateSingleGroup(result,
287                                        UCharacterName.getGroupLimit(gMSB))) {
288                    return false;
289                }
290                ++ m_groupIndex_; // continue with the next group
291            }
292            else if (startMSB > gMSB) {
293                    // make sure that we start enumerating with the first group
294                    // after start
295                    m_groupIndex_ ++;
296            }
297            else {
298                int gMIN = UCharacterName.getGroupMin(gMSB);
299                if (gMIN > limit) {
300                    gMIN = limit;
301                }
302                if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
303                    if (!iterateExtended(result, gMIN)) {
304                        return false;
305                    }
306                }
307                m_current_ = gMIN;
308            }
309        }
310
311        return true;
312    }
313
314    /**
315     * Iterate extended names.
316     * @param result stores the result codepoint and name
317     * @param limit last codepoint + 1 in range to search
318     * @return false if a codepoint with a name is found and we can
319     *         bail from further iteration, true to continue on with the
320     *         iteration (this will always be false for valid codepoints)
321     */
322    private boolean iterateExtended(ValueIterator.Element result,
323                                    int limit)
324    {
325        while (m_current_ < limit) {
326            String name = m_name_.getExtendedOr10Name(m_current_);
327            if (name != null && name.length() > 0) {
328                result.integer = m_current_;
329                result.value   = name;
330                return false;
331            }
332            ++ m_current_;
333        }
334        return true;
335    }
336}
337