1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (c) 2002-2014, International Business Machines
6* Corporation and others.  All Rights Reserved.
7**********************************************************************
8*/
9#ifndef USETITER_H
10#define USETITER_H
11
12#include "unicode/utypes.h"
13#include "unicode/uobject.h"
14#include "unicode/unistr.h"
15
16/**
17 * \file
18 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
19 */
20
21U_NAMESPACE_BEGIN
22
23class UnicodeSet;
24class UnicodeString;
25
26/**
27 *
28 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
29 * iterates over either code points or code point ranges.  After all
30 * code points or ranges have been returned, it returns the
31 * multicharacter strings of the UnicodeSet, if any.
32 *
33 * This class is not intended to be subclassed.  Consider any fields
34 *  or methods declared as "protected" to be private.  The use of
35 *  protected in this class is an artifact of history.
36 *
37 * <p>To iterate over code points and strings, use a loop like this:
38 * <pre>
39 * UnicodeSetIterator it(set);
40 * while (it.next()) {
41 *     processItem(it.getString());
42 * }
43 * </pre>
44 * <p>Each item in the set is accessed as a string.  Set elements
45 *    consisting of single code points are returned as strings containing
46 *    just the one code point.
47 *
48 * <p>To iterate over code point ranges, instead of individual code points,
49 *    use a loop like this:
50 * <pre>
51 * UnicodeSetIterator it(set);
52 * while (it.nextRange()) {
53 *   if (it.isString()) {
54 *     processString(it.getString());
55 *   } else {
56 *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
57 *   }
58 * }
59 * </pre>
60 * @author M. Davis
61 * @stable ICU 2.4
62 */
63class U_COMMON_API UnicodeSetIterator : public UObject {
64
65 protected:
66
67    /**
68     * Value of <tt>codepoint</tt> if the iterator points to a string.
69     * If <tt>codepoint == IS_STRING</tt>, then examine
70     * <tt>string</tt> for the current iteration result.
71     * @stable ICU 2.4
72     */
73    enum { IS_STRING = -1 };
74
75    /**
76     * Current code point, or the special value <tt>IS_STRING</tt>, if
77     * the iterator points to a string.
78     * @stable ICU 2.4
79     */
80    UChar32 codepoint;
81
82    /**
83     * When iterating over ranges using <tt>nextRange()</tt>,
84     * <tt>codepointEnd</tt> contains the inclusive end of the
85     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
86     * iterating over code points using <tt>next()</tt>, or if
87     * <tt>codepoint == IS_STRING</tt>, then the value of
88     * <tt>codepointEnd</tt> is undefined.
89     * @stable ICU 2.4
90     */
91    UChar32 codepointEnd;
92
93    /**
94     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
95     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
96     * value of <tt>string</tt> is undefined.
97     * @stable ICU 2.4
98     */
99    const UnicodeString* string;
100
101 public:
102
103    /**
104     * Create an iterator over the given set.  The iterator is valid
105     * only so long as <tt>set</tt> is valid.
106     * @param set set to iterate over
107     * @stable ICU 2.4
108     */
109    UnicodeSetIterator(const UnicodeSet& set);
110
111    /**
112     * Create an iterator over nothing.  <tt>next()</tt> and
113     * <tt>nextRange()</tt> return false. This is a convenience
114     * constructor allowing the target to be set later.
115     * @stable ICU 2.4
116     */
117    UnicodeSetIterator();
118
119    /**
120     * Destructor.
121     * @stable ICU 2.4
122     */
123    virtual ~UnicodeSetIterator();
124
125    /**
126     * Returns true if the current element is a string.  If so, the
127     * caller can retrieve it with <tt>getString()</tt>.  If this
128     * method returns false, the current element is a code point or
129     * code point range, depending on whether <tt>next()</tt> or
130     * <tt>nextRange()</tt> was called.
131     * Elements of types string and codepoint can both be retrieved
132     * with the function <tt>getString()</tt>.
133     * Elements of type codepoint can also be retrieved with
134     * <tt>getCodepoint()</tt>.
135     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
136     * of the range, and <tt>getCodepointEnd()</tt> returns the end
137     * of the range.
138     * @stable ICU 2.4
139     */
140    inline UBool isString() const;
141
142    /**
143     * Returns the current code point, if <tt>isString()</tt> returned
144     * false.  Otherwise returns an undefined result.
145     * @stable ICU 2.4
146     */
147    inline UChar32 getCodepoint() const;
148
149    /**
150     * Returns the end of the current code point range, if
151     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
152     * called.  Otherwise returns an undefined result.
153     * @stable ICU 2.4
154     */
155    inline UChar32 getCodepointEnd() const;
156
157    /**
158     * Returns the current string, if <tt>isString()</tt> returned
159     * true.  If the current iteration item is a code point, a UnicodeString
160     * containing that single code point is returned.
161     *
162     * Ownership of the returned string remains with the iterator.
163     * The string is guaranteed to remain valid only until the iterator is
164     *   advanced to the next item, or until the iterator is deleted.
165     *
166     * @stable ICU 2.4
167     */
168    const UnicodeString& getString();
169
170    /**
171     * Advances the iteration position to the next element in the set,
172     * which can be either a single code point or a string.
173     * If there are no more elements in the set, return false.
174     *
175     * <p>
176     * If <tt>isString() == TRUE</tt>, the value is a
177     * string, otherwise the value is a
178     * single code point.  Elements of either type can be retrieved
179     * with the function <tt>getString()</tt>, while elements of
180     * consisting of a single code point can be retrieved with
181     * <tt>getCodepoint()</tt>
182     *
183     * <p>The order of iteration is all code points in sorted order,
184     * followed by all strings sorted order.    Do not mix
185     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
186     * calling <tt>reset()</tt> between them.  The results of doing so
187     * are undefined.
188     *
189     * @return true if there was another element in the set.
190     * @stable ICU 2.4
191     */
192    UBool next();
193
194    /**
195     * Returns the next element in the set, either a code point range
196     * or a string.  If there are no more elements in the set, return
197     * false.  If <tt>isString() == TRUE</tt>, the value is a
198     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
199     * range of one or more code points from <tt>getCodepoint()</tt> to
200     * <tt>getCodepointeEnd()</tt> inclusive.
201     *
202     * <p>The order of iteration is all code points ranges in sorted
203     * order, followed by all strings sorted order.  Ranges are
204     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
205     * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
206     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
207     * <tt>reset()</tt> between them.  The results of doing so are
208     * undefined.
209     *
210     * @return true if there was another element in the set.
211     * @stable ICU 2.4
212     */
213    UBool nextRange();
214
215    /**
216     * Sets this iterator to visit the elements of the given set and
217     * resets it to the start of that set.  The iterator is valid only
218     * so long as <tt>set</tt> is valid.
219     * @param set the set to iterate over.
220     * @stable ICU 2.4
221     */
222    void reset(const UnicodeSet& set);
223
224    /**
225     * Resets this iterator to the start of the set.
226     * @stable ICU 2.4
227     */
228    void reset();
229
230    /**
231     * ICU "poor man's RTTI", returns a UClassID for this class.
232     *
233     * @stable ICU 2.4
234     */
235    static UClassID U_EXPORT2 getStaticClassID();
236
237    /**
238     * ICU "poor man's RTTI", returns a UClassID for the actual class.
239     *
240     * @stable ICU 2.4
241     */
242    virtual UClassID getDynamicClassID() const;
243
244    // ======================= PRIVATES ===========================
245
246 protected:
247
248    // endElement and nextElements are really UChar32's, but we keep
249    // them as signed int32_t's so we can do comparisons with
250    // endElement set to -1.  Leave them as int32_t's.
251    /** The set
252     * @stable ICU 2.4
253     */
254    const UnicodeSet* set;
255    /** End range
256     * @stable ICU 2.4
257     */
258    int32_t endRange;
259    /** Range
260     * @stable ICU 2.4
261     */
262    int32_t range;
263    /** End element
264     * @stable ICU 2.4
265     */
266    int32_t endElement;
267    /** Next element
268     * @stable ICU 2.4
269     */
270    int32_t nextElement;
271    //UBool abbreviated;
272    /** Next string
273     * @stable ICU 2.4
274     */
275    int32_t nextString;
276    /** String count
277     * @stable ICU 2.4
278     */
279    int32_t stringCount;
280
281    /**
282     *  Points to the string to use when the caller asks for a
283     *  string and the current iteration item is a code point, not a string.
284     *  @internal
285     */
286    UnicodeString *cpString;
287
288    /** Copy constructor. Disallowed.
289     * @stable ICU 2.4
290     */
291    UnicodeSetIterator(const UnicodeSetIterator&); // disallow
292
293    /** Assignment operator. Disallowed.
294     * @stable ICU 2.4
295     */
296    UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
297
298    /** Load range
299     * @stable ICU 2.4
300     */
301    virtual void loadRange(int32_t range);
302
303};
304
305inline UBool UnicodeSetIterator::isString() const {
306    return codepoint == (UChar32)IS_STRING;
307}
308
309inline UChar32 UnicodeSetIterator::getCodepoint() const {
310    return codepoint;
311}
312
313inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
314    return codepointEnd;
315}
316
317
318U_NAMESPACE_END
319
320#endif
321