1/* GENERATED SOURCE. DO NOT MODIFY. */
2/**
3*******************************************************************************
4* Copyright (C) 1996-2016, International Business Machines Corporation and    *
5* others. All Rights Reserved.                                                *
6*******************************************************************************
7*/
8
9package android.icu.util;
10
11import java.util.Enumeration;
12import java.util.NoSuchElementException;
13
14import android.icu.text.UTF16;
15import android.icu.text.UnicodeSet;
16
17/**
18 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.util.Calendar}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
19 *
20 * <p>The string tokenizer class allows an application to break a string
21 * into tokens by performing code point comparison.
22 * The <code>StringTokenizer</code> methods do not distinguish
23 * among identifiers, numbers, and quoted strings, nor do they recognize
24 * and skip comments.
25 * <p>
26 * The set of delimiters (the codepoints that separate tokens) may be
27 * specified either at creation time or on a per-token basis.
28 * <p>
29 * An instance of <code>StringTokenizer</code> behaves in one of three ways,
30 * depending on whether it was created with the <code>returnDelims</code>
31 * and <code>coalesceDelims</code>
32 * flags having the value <code>true</code> or <code>false</code>:
33 * <ul>
34 * <li>If returnDelims is <code>false</code>, delimiter code points serve to
35 * separate tokens. A token is a maximal sequence of consecutive
36 * code points that are not delimiters.
37 * <li>If returnDelims is <code>true</code>, delimiter code points are
38 * themselves considered to be tokens. In this case, if coalesceDelims is
39 * <code>true</code>, such tokens will be the maximal sequence of consecutive
40 * code points that <em>are</em> delimiters.  If coalesceDelims is false,
41 * a token will be received for each delimiter code point.
42 * </ul>
43 * <p>A token is thus either one
44 * delimiter code point, a maximal sequence of consecutive code points that
45 * are delimiters, or a maximal sequence of consecutive code
46 * points that are not delimiters.
47 * <p>
48 * A <tt>StringTokenizer</tt> object internally maintains a current
49 * position within the string to be tokenized. Some operations advance this
50 * current position past the code point processed.
51 * <p>
52 * A token is returned by taking a substring of the string that was used to
53 * create the <tt>StringTokenizer</tt> object.
54 * <p>
55 * Example of the use of the default delimiter tokenizer.
56 * <blockquote><pre>
57 * StringTokenizer st = new StringTokenizer("this is a test");
58 * while (st.hasMoreTokens()) {
59 *     println(st.nextToken());
60 *     }
61 * </pre></blockquote>
62 * <p>
63 * prints the following output:
64 * <blockquote><pre>
65 *     this
66 *     is
67 *     a
68 *     test
69 * </pre></blockquote>
70 * <p>
71 * Example of the use of the tokenizer with user specified delimiter.
72 * <blockquote><pre>
73 *     StringTokenizer st = new StringTokenizer(
74 *     "this is a test with supplementary characters &#92;ud800&#92;ud800&#92;udc00&#92;udc00",
75 *         " &#92;ud800&#92;udc00");
76 *     while (st.hasMoreTokens()) {
77 *         println(st.nextToken());
78 *     }
79 * </pre></blockquote>
80 * <p>
81 * prints the following output:
82 * <blockquote><pre>
83 *     this
84 *     is
85 *     a
86 *     test
87 *     with
88 *     supplementary
89 *     characters
90 *     &#92;ud800
91 *     &#92;udc00
92 * </pre></blockquote>
93 *
94 * @author syn wee
95 * @hide Only a subset of ICU is exposed in Android
96 */
97public final class StringTokenizer implements Enumeration<Object>
98{
99    // public constructors ---------------------------------------------
100
101    /**
102     * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. All
103     * characters in the delim argument are the delimiters for separating
104     * tokens.
105     * <p>If the returnDelims flag is false, the delimiter characters are
106     * skipped and only serve as separators between tokens.
107     * <p>If the returnDelims flag is true, then the delimiter characters
108     * are also returned as tokens, one per delimiter.
109     * @param str a string to be parsed.
110     * @param delim the delimiters.
111     * @param returndelims flag indicating whether to return the delimiters
112     *        as tokens.
113     * @exception NullPointerException if str is null
114     */
115    public StringTokenizer(String str, UnicodeSet delim, boolean returndelims)
116    {
117        this(str, delim, returndelims, false);
118    }
119
120    /**
121     * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. All
122     * characters in the delim argument are the delimiters for separating
123     * tokens.
124     * <p>If the returnDelims flag is false, the delimiter characters are
125     * skipped and only serve as separators between tokens.
126     * <p>If the returnDelims flag is true, then the delimiter characters
127     * are also returned as tokens.  If coalescedelims is true, one token
128     * is returned for each run of delimiter characters, otherwise one
129     * token is returned per delimiter.  Since surrogate pairs can be
130     * delimiters, the returned token might be two chars in length.
131     * @param str a string to be parsed.
132     * @param delim the delimiters.
133     * @param returndelims flag indicating whether to return the delimiters
134     *        as tokens.
135     * @param coalescedelims flag indicating whether to return a run of
136     *        delimiters as a single token or as one token per delimiter.
137     *        This only takes effect if returndelims is true.
138     * @exception NullPointerException if str is null
139     * @deprecated This API is ICU internal only.
140     * @hide draft / provisional / internal are hidden on Android
141     */
142    @Deprecated
143    public StringTokenizer(String str, UnicodeSet delim, boolean returndelims, boolean coalescedelims)
144    {
145        m_source_ = str;
146        m_length_ = str.length();
147        if (delim == null) {
148            m_delimiters_ = EMPTY_DELIMITER_;
149        }
150        else {
151            m_delimiters_ = delim;
152        }
153        m_returnDelimiters_ = returndelims;
154        m_coalesceDelimiters_ = coalescedelims;
155        m_tokenOffset_ = -1;
156        m_tokenSize_ = -1;
157        if (m_length_ == 0) {
158            // string length 0, no tokens
159            m_nextOffset_ = -1;
160        }
161        else {
162            m_nextOffset_ = 0;
163            if (!returndelims) {
164                m_nextOffset_ = getNextNonDelimiter(0);
165            }
166        }
167    }
168
169    /**
170     * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. The
171     * characters in the delim argument are the delimiters for separating
172     * tokens.
173     * <p>Delimiter characters themselves will not be treated as tokens.
174     * @param str a string to be parsed.
175     * @param delim the delimiters.
176     * @exception NullPointerException if str is null
177     */
178    public StringTokenizer(String str, UnicodeSet delim)
179    {
180        this(str, delim, false, false);
181    }
182
183    /**
184     * <p>Constructs a string tokenizer for the specified string. All
185     * characters in the delim argument are the delimiters for separating
186     * tokens.
187     * <p>If the returnDelims flag is false, the delimiter characters are
188     * skipped and only serve as separators between tokens.
189     * <p>If the returnDelims flag is true, then the delimiter characters
190     * are also returned as tokens, one per delimiter.
191     * @param str a string to be parsed.
192     * @param delim the delimiters.
193     * @param returndelims flag indicating whether to return the delimiters
194     *        as tokens.
195     * @exception NullPointerException if str is null
196     */
197    public StringTokenizer(String str, String delim, boolean returndelims)
198    {
199        this(str, delim, returndelims, false); // java default behavior
200    }
201
202    /**
203     * <p>Constructs a string tokenizer for the specified string. All
204     * characters in the delim argument are the delimiters for separating
205     * tokens.
206     * <p>If the returnDelims flag is false, the delimiter characters are
207     * skipped and only serve as separators between tokens.
208     * <p>If the returnDelims flag is true, then the delimiter characters
209     * are also returned as tokens.  If coalescedelims is true, one token
210     * is returned for each run of delimiter characters, otherwise one
211     * token is returned per delimiter.  Since surrogate pairs can be
212     * delimiters, the returned token might be two chars in length.
213     * @param str a string to be parsed.
214     * @param delim the delimiters.
215     * @param returndelims flag indicating whether to return the delimiters
216     *        as tokens.
217     * @param coalescedelims flag indicating whether to return a run of
218     *        delimiters as a single token or as one token per delimiter.
219     *        This only takes effect if returndelims is true.
220     * @exception NullPointerException if str is null
221     * @deprecated This API is ICU internal only.
222     * @hide draft / provisional / internal are hidden on Android
223     */
224    @Deprecated
225    public StringTokenizer(String str, String delim, boolean returndelims, boolean coalescedelims)
226    {
227        // don't ignore whitespace
228        m_delimiters_ = EMPTY_DELIMITER_;
229        if (delim != null && delim.length() > 0) {
230            m_delimiters_ = new UnicodeSet();
231            m_delimiters_.addAll(delim);
232            checkDelimiters();
233        }
234        m_coalesceDelimiters_ = coalescedelims;
235        m_source_ = str;
236        m_length_ = str.length();
237        m_returnDelimiters_ = returndelims;
238        m_tokenOffset_ = -1;
239        m_tokenSize_ = -1;
240        if (m_length_ == 0) {
241            // string length 0, no tokens
242            m_nextOffset_ = -1;
243        }
244        else {
245            m_nextOffset_ = 0;
246            if (!returndelims) {
247                m_nextOffset_ = getNextNonDelimiter(0);
248            }
249        }
250    }
251
252    /**
253     * <p>Constructs a string tokenizer for the specified string. The
254     * characters in the delim argument are the delimiters for separating
255     * tokens.
256     * <p>Delimiter characters themselves will not be treated as tokens.
257     * @param str a string to be parsed.
258     * @param delim the delimiters.
259     * @exception NullPointerException if str is null
260     */
261    public StringTokenizer(String str, String delim)
262    {
263        // don't ignore whitespace
264        this(str, delim, false, false);
265    }
266
267    /**
268     * <p>Constructs a string tokenizer for the specified string.
269     * The tokenizer uses the default delimiter set, which is
270     * " &#92;t&#92;n&#92;r&#92;f":
271     * the space character, the tab character, the newline character, the
272     * carriage-return character, and the form-feed character.
273     * <p>Delimiter characters themselves will not be treated as tokens.
274     * @param str a string to be parsed
275     * @exception NullPointerException if str is null
276     */
277    public StringTokenizer(String str)
278    {
279        this(str, DEFAULT_DELIMITERS_, false, false);
280    }
281
282    // public methods --------------------------------------------------
283
284    /**
285     * Tests if there are more tokens available from this tokenizer's
286     * string.
287     * If this method returns <tt>true</tt>, then a subsequent call to
288     * <tt>nextToken</tt> with no argument will successfully return a token.
289     * @return <code>true</code> if and only if there is at least one token
290     *         in the string after the current position; <code>false</code>
291     *         otherwise.
292     */
293    public boolean hasMoreTokens()
294    {
295        return m_nextOffset_ >= 0;
296    }
297
298    /**
299     * Returns the next token from this string tokenizer.
300     * @return the next token from this string tokenizer.
301     * @exception NoSuchElementException if there are no more tokens in
302     *            this tokenizer's string.
303     */
304    public String nextToken()
305    {
306        if (m_tokenOffset_ < 0) {
307            if (m_nextOffset_ < 0) {
308                throw new NoSuchElementException("No more tokens in String");
309            }
310            // pre-calculations of tokens not done
311            if (m_returnDelimiters_) {
312                int tokenlimit = 0;
313                int c = UTF16.charAt(m_source_, m_nextOffset_);
314                boolean contains = delims == null
315                    ? m_delimiters_.contains(c)
316                    : c < delims.length && delims[c];
317                if (contains) {
318                     if (m_coalesceDelimiters_) {
319                        tokenlimit = getNextNonDelimiter(m_nextOffset_);
320                     } else {
321                        tokenlimit = m_nextOffset_ + UTF16.getCharCount(c);
322                        if (tokenlimit == m_length_) {
323                            tokenlimit = -1;
324                        }
325                     }
326                }
327                else {
328                    tokenlimit = getNextDelimiter(m_nextOffset_);
329                }
330                String result;
331                if (tokenlimit < 0) {
332                    result = m_source_.substring(m_nextOffset_);
333                }
334                else {
335                    result = m_source_.substring(m_nextOffset_, tokenlimit);
336                }
337                m_nextOffset_ = tokenlimit;
338                return result;
339            }
340            else {
341                int tokenlimit = getNextDelimiter(m_nextOffset_);
342                String result;
343                if (tokenlimit < 0) {
344                    result = m_source_.substring(m_nextOffset_);
345                    m_nextOffset_ = tokenlimit;
346                }
347                else {
348                    result = m_source_.substring(m_nextOffset_, tokenlimit);
349                    m_nextOffset_ = getNextNonDelimiter(tokenlimit);
350                }
351
352                return result;
353            }
354        }
355        // count was called before and we have all the tokens
356        if (m_tokenOffset_ >= m_tokenSize_) {
357            throw new NoSuchElementException("No more tokens in String");
358        }
359        String result;
360        if (m_tokenLimit_[m_tokenOffset_] >= 0) {
361            result = m_source_.substring(m_tokenStart_[m_tokenOffset_],
362                                         m_tokenLimit_[m_tokenOffset_]);
363        }
364        else {
365            result = m_source_.substring(m_tokenStart_[m_tokenOffset_]);
366        }
367        m_tokenOffset_ ++;
368        m_nextOffset_ = -1;
369        if (m_tokenOffset_ < m_tokenSize_) {
370            m_nextOffset_ = m_tokenStart_[m_tokenOffset_];
371        }
372        return result;
373    }
374
375    /**
376     * Returns the next token in this string tokenizer's string. First,
377     * the set of characters considered to be delimiters by this
378     * <tt>StringTokenizer</tt> object is changed to be the characters in
379     * the string <tt>delim</tt>. Then the next token in the string
380     * after the current position is returned. The current position is
381     * advanced beyond the recognized token.  The new delimiter set
382     * remains the default after this call.
383     * @param delim the new delimiters.
384     * @return the next token, after switching to the new delimiter set.
385     * @exception NoSuchElementException if there are no more tokens in
386     *            this tokenizer's string.
387     */
388    public String nextToken(String delim)
389    {
390        m_delimiters_ = EMPTY_DELIMITER_;
391        if (delim != null && delim.length() > 0) {
392            m_delimiters_ = new UnicodeSet();
393            m_delimiters_.addAll(delim);
394        }
395        return nextToken(m_delimiters_);
396    }
397
398    /**
399     * <strong>[icu]</strong> Returns the next token in this string tokenizer's string. First,
400     * the set of characters considered to be delimiters by this
401     * <tt>StringTokenizer</tt> object is changed to be the characters in
402     * the string <tt>delim</tt>. Then the next token in the string
403     * after the current position is returned. The current position is
404     * advanced beyond the recognized token.  The new delimiter set
405     * remains the default after this call.
406     * @param delim the new delimiters.
407     * @return the next token, after switching to the new delimiter set.
408     * @exception NoSuchElementException if there are no more tokens in
409     *            this tokenizer's string.
410     */
411    public String nextToken(UnicodeSet delim)
412    {
413        m_delimiters_ = delim;
414        checkDelimiters();
415        m_tokenOffset_ = -1;
416        m_tokenSize_ = -1;
417        if (!m_returnDelimiters_) {
418            m_nextOffset_ = getNextNonDelimiter(m_nextOffset_);
419        }
420        return nextToken();
421    }
422
423    /**
424     * Returns the same value as the <code>hasMoreTokens</code> method.
425     * It exists so that this class can implement the
426     * <code>Enumeration</code> interface.
427     * @return <code>true</code> if there are more tokens;
428     *         <code>false</code> otherwise.
429     * @see #hasMoreTokens()
430     */
431    public boolean hasMoreElements()
432    {
433        return hasMoreTokens();
434    }
435
436    /**
437     * Returns the same value as the <code>nextToken</code> method, except
438     * that its declared return value is <code>Object</code> rather than
439     * <code>String</code>. It exists so that this class can implement the
440     * <code>Enumeration</code> interface.
441     * @return the next token in the string.
442     * @exception NoSuchElementException if there are no more tokens in
443     *            this tokenizer's string.
444     * @see #nextToken()
445     */
446    public Object nextElement()
447    {
448        return nextToken();
449    }
450
451    /**
452     * Calculates the number of times that this tokenizer's
453     * <code>nextToken</code> method can be called before it generates an
454     * exception. The current position is not advanced.
455     * @return the number of tokens remaining in the string using the
456     *         current delimiter set.
457     * @see #nextToken()
458     */
459    public int countTokens()
460    {
461        int result = 0;
462        if (hasMoreTokens()) {
463            if (m_tokenOffset_ >= 0) {
464                return m_tokenSize_ - m_tokenOffset_;
465            }
466            if (m_tokenStart_ == null) {
467                m_tokenStart_ = new int[TOKEN_SIZE_];
468                m_tokenLimit_ = new int[TOKEN_SIZE_];
469            }
470            do {
471                if (m_tokenStart_.length == result) {
472                    int temptokenindex[] = m_tokenStart_;
473                    int temptokensize[] = m_tokenLimit_;
474                    int originalsize = temptokenindex.length;
475                    int newsize = originalsize + TOKEN_SIZE_;
476                    m_tokenStart_ = new int[newsize];
477                    m_tokenLimit_ = new int[newsize];
478                    System.arraycopy(temptokenindex, 0, m_tokenStart_, 0,
479                                     originalsize);
480                    System.arraycopy(temptokensize, 0, m_tokenLimit_, 0,
481                                     originalsize);
482                }
483                m_tokenStart_[result] = m_nextOffset_;
484                if (m_returnDelimiters_) {
485                    int c = UTF16.charAt(m_source_, m_nextOffset_);
486                    boolean contains = delims == null
487                        ? m_delimiters_.contains(c)
488                        : c < delims.length && delims[c];
489                    if (contains) {
490                        if (m_coalesceDelimiters_) {
491                            m_tokenLimit_[result] = getNextNonDelimiter(
492                                                                m_nextOffset_);
493                        } else {
494                            int p = m_nextOffset_ + 1;
495                            if (p == m_length_) {
496                                p = -1;
497                            }
498                            m_tokenLimit_[result] = p;
499
500                        }
501                    }
502                    else {
503                        m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_);
504                    }
505                    m_nextOffset_ = m_tokenLimit_[result];
506                }
507                else {
508                    m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_);
509                    m_nextOffset_ = getNextNonDelimiter(m_tokenLimit_[result]);
510                }
511                result ++;
512            } while (m_nextOffset_ >= 0);
513            m_tokenOffset_ = 0;
514            m_tokenSize_ = result;
515            m_nextOffset_ = m_tokenStart_[0];
516        }
517        return result;
518    }
519
520    // private data members -------------------------------------------------
521
522    /**
523     * Current offset to the token array. If the array token is not set up yet,
524     * this value is a -1
525     */
526    private int m_tokenOffset_;
527    /**
528     * Size of the token array. If the array token is not set up yet,
529     * this value is a -1
530     */
531    private int m_tokenSize_;
532    /**
533     * Array of pre-calculated tokens start indexes in source string terminated
534     * by -1.
535     * This is only set up during countTokens() and only stores the remaining
536     * tokens, not all tokens including parsed ones
537     */
538    private int m_tokenStart_[];
539    /**
540     * Array of pre-calculated tokens limit indexes in source string.
541     * This is only set up during countTokens() and only stores the remaining
542     * tokens, not all tokens including parsed ones
543     */
544    private int m_tokenLimit_[];
545    /**
546     * UnicodeSet containing delimiters
547     */
548    private UnicodeSet m_delimiters_;
549    /**
550     * String to parse for tokens
551     */
552    private String m_source_;
553    /**
554     * Length of m_source_
555     */
556    private int m_length_;
557    /**
558     * Current position in string to parse for tokens
559     */
560    private int m_nextOffset_;
561    /**
562     * Flag indicator if delimiters are to be treated as tokens too
563     */
564    private boolean m_returnDelimiters_;
565
566    /**
567     * Flag indicating whether to coalesce runs of delimiters into single tokens
568     */
569    private boolean m_coalesceDelimiters_;
570
571    /**
572     * Default set of delimiters &#92;t&#92;n&#92;r&#92;f
573     */
574    private static final UnicodeSet DEFAULT_DELIMITERS_
575        = new UnicodeSet(0x09, 0x0a, 0x0c, 0x0d, 0x20, 0x20);   // UnicodeSet("[ \t\n\r\f]", false)
576    /**
577     * Array size increments
578     */
579    private static final int TOKEN_SIZE_ = 100;
580    /**
581     * A empty delimiter UnicodeSet, used when user specified null delimiters
582     */
583    private static final UnicodeSet EMPTY_DELIMITER_ = UnicodeSet.EMPTY;
584
585    // private methods ------------------------------------------------------
586
587    /**
588     * Gets the index of the next delimiter after offset
589     * @param offset to the source string
590     * @return offset of the immediate next delimiter, otherwise
591     *         (- source string length - 1) if there
592     *         are no more delimiters after m_nextOffset
593     */
594    private int getNextDelimiter(int offset)
595    {
596        if (offset >= 0) {
597            int result = offset;
598            int c = 0;
599            if (delims == null) {
600                do {
601                    c = UTF16.charAt(m_source_, result);
602                    if (m_delimiters_.contains(c)) {
603                        break;
604                    }
605                    result ++;
606                } while (result < m_length_);
607            } else {
608                do {
609                    c = UTF16.charAt(m_source_, result);
610                    if (c < delims.length && delims[c]) {
611                        break;
612                    }
613                    result ++;
614                } while (result < m_length_);
615            }
616            if (result < m_length_) {
617                return result;
618            }
619        }
620        return -1 - m_length_;
621    }
622
623    /**
624     * Gets the index of the next non-delimiter after m_nextOffset_
625     * @param offset to the source string
626     * @return offset of the immediate next non-delimiter, otherwise
627     *         (- source string length - 1) if there
628     *         are no more delimiters after m_nextOffset
629     */
630    private int getNextNonDelimiter(int offset)
631    {
632        if (offset >= 0) {
633            int result = offset;
634            int c = 0;
635            if (delims == null) {
636                do {
637                    c = UTF16.charAt(m_source_, result);
638                    if (!m_delimiters_.contains(c)) {
639                        break;
640                    }
641                    result ++;
642                } while (result < m_length_);
643            } else {
644                do {
645                    c = UTF16.charAt(m_source_, result);
646                    if (!(c < delims.length && delims[c])) {
647                        break;
648                    }
649                    result ++;
650                } while (result < m_length_);
651            }
652            if (result < m_length_) {
653                return result;
654            }
655        }
656        return -1 - m_length_;
657    }
658
659    void checkDelimiters() {
660        if (m_delimiters_ == null || m_delimiters_.size() == 0) {
661            delims = new boolean[0];
662        } else {
663            int maxChar = m_delimiters_.getRangeEnd(m_delimiters_.getRangeCount()-1);
664            if (maxChar < 0x7f) {
665                delims = new boolean[maxChar+1];
666                for (int i = 0, ch; -1 != (ch = m_delimiters_.charAt(i)); ++i) {
667                    delims[ch] = true;
668                }
669            } else {
670                delims = null;
671            }
672        }
673    }
674    private boolean[] delims;
675}
676