1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/** 3******************************************************************************* 4* Copyright (C) 1996-2016, International Business Machines Corporation and * 5* others. All Rights Reserved. * 6******************************************************************************* 7*/ 8 9package android.icu.util; 10 11import java.util.Enumeration; 12import java.util.NoSuchElementException; 13 14import android.icu.text.UTF16; 15import android.icu.text.UnicodeSet; 16 17/** 18 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.util.Calendar}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 19 * 20 * <p>The string tokenizer class allows an application to break a string 21 * into tokens by performing code point comparison. 22 * The <code>StringTokenizer</code> methods do not distinguish 23 * among identifiers, numbers, and quoted strings, nor do they recognize 24 * and skip comments. 25 * <p> 26 * The set of delimiters (the codepoints that separate tokens) may be 27 * specified either at creation time or on a per-token basis. 28 * <p> 29 * An instance of <code>StringTokenizer</code> behaves in one of three ways, 30 * depending on whether it was created with the <code>returnDelims</code> 31 * and <code>coalesceDelims</code> 32 * flags having the value <code>true</code> or <code>false</code>: 33 * <ul> 34 * <li>If returnDelims is <code>false</code>, delimiter code points serve to 35 * separate tokens. A token is a maximal sequence of consecutive 36 * code points that are not delimiters. 37 * <li>If returnDelims is <code>true</code>, delimiter code points are 38 * themselves considered to be tokens. In this case, if coalesceDelims is 39 * <code>true</code>, such tokens will be the maximal sequence of consecutive 40 * code points that <em>are</em> delimiters. If coalesceDelims is false, 41 * a token will be received for each delimiter code point. 42 * </ul> 43 * <p>A token is thus either one 44 * delimiter code point, a maximal sequence of consecutive code points that 45 * are delimiters, or a maximal sequence of consecutive code 46 * points that are not delimiters. 47 * <p> 48 * A <tt>StringTokenizer</tt> object internally maintains a current 49 * position within the string to be tokenized. Some operations advance this 50 * current position past the code point processed. 51 * <p> 52 * A token is returned by taking a substring of the string that was used to 53 * create the <tt>StringTokenizer</tt> object. 54 * <p> 55 * Example of the use of the default delimiter tokenizer. 56 * <blockquote><pre> 57 * StringTokenizer st = new StringTokenizer("this is a test"); 58 * while (st.hasMoreTokens()) { 59 * println(st.nextToken()); 60 * } 61 * </pre></blockquote> 62 * <p> 63 * prints the following output: 64 * <blockquote><pre> 65 * this 66 * is 67 * a 68 * test 69 * </pre></blockquote> 70 * <p> 71 * Example of the use of the tokenizer with user specified delimiter. 72 * <blockquote><pre> 73 * StringTokenizer st = new StringTokenizer( 74 * "this is a test with supplementary characters \ud800\ud800\udc00\udc00", 75 * " \ud800\udc00"); 76 * while (st.hasMoreTokens()) { 77 * println(st.nextToken()); 78 * } 79 * </pre></blockquote> 80 * <p> 81 * prints the following output: 82 * <blockquote><pre> 83 * this 84 * is 85 * a 86 * test 87 * with 88 * supplementary 89 * characters 90 * \ud800 91 * \udc00 92 * </pre></blockquote> 93 * 94 * @author syn wee 95 * @hide Only a subset of ICU is exposed in Android 96 */ 97public final class StringTokenizer implements Enumeration<Object> 98{ 99 // public constructors --------------------------------------------- 100 101 /** 102 * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. All 103 * characters in the delim argument are the delimiters for separating 104 * tokens. 105 * <p>If the returnDelims flag is false, the delimiter characters are 106 * skipped and only serve as separators between tokens. 107 * <p>If the returnDelims flag is true, then the delimiter characters 108 * are also returned as tokens, one per delimiter. 109 * @param str a string to be parsed. 110 * @param delim the delimiters. 111 * @param returndelims flag indicating whether to return the delimiters 112 * as tokens. 113 * @exception NullPointerException if str is null 114 */ 115 public StringTokenizer(String str, UnicodeSet delim, boolean returndelims) 116 { 117 this(str, delim, returndelims, false); 118 } 119 120 /** 121 * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. All 122 * characters in the delim argument are the delimiters for separating 123 * tokens. 124 * <p>If the returnDelims flag is false, the delimiter characters are 125 * skipped and only serve as separators between tokens. 126 * <p>If the returnDelims flag is true, then the delimiter characters 127 * are also returned as tokens. If coalescedelims is true, one token 128 * is returned for each run of delimiter characters, otherwise one 129 * token is returned per delimiter. Since surrogate pairs can be 130 * delimiters, the returned token might be two chars in length. 131 * @param str a string to be parsed. 132 * @param delim the delimiters. 133 * @param returndelims flag indicating whether to return the delimiters 134 * as tokens. 135 * @param coalescedelims flag indicating whether to return a run of 136 * delimiters as a single token or as one token per delimiter. 137 * This only takes effect if returndelims is true. 138 * @exception NullPointerException if str is null 139 * @deprecated This API is ICU internal only. 140 * @hide draft / provisional / internal are hidden on Android 141 */ 142 @Deprecated 143 public StringTokenizer(String str, UnicodeSet delim, boolean returndelims, boolean coalescedelims) 144 { 145 m_source_ = str; 146 m_length_ = str.length(); 147 if (delim == null) { 148 m_delimiters_ = EMPTY_DELIMITER_; 149 } 150 else { 151 m_delimiters_ = delim; 152 } 153 m_returnDelimiters_ = returndelims; 154 m_coalesceDelimiters_ = coalescedelims; 155 m_tokenOffset_ = -1; 156 m_tokenSize_ = -1; 157 if (m_length_ == 0) { 158 // string length 0, no tokens 159 m_nextOffset_ = -1; 160 } 161 else { 162 m_nextOffset_ = 0; 163 if (!returndelims) { 164 m_nextOffset_ = getNextNonDelimiter(0); 165 } 166 } 167 } 168 169 /** 170 * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. The 171 * characters in the delim argument are the delimiters for separating 172 * tokens. 173 * <p>Delimiter characters themselves will not be treated as tokens. 174 * @param str a string to be parsed. 175 * @param delim the delimiters. 176 * @exception NullPointerException if str is null 177 */ 178 public StringTokenizer(String str, UnicodeSet delim) 179 { 180 this(str, delim, false, false); 181 } 182 183 /** 184 * <p>Constructs a string tokenizer for the specified string. All 185 * characters in the delim argument are the delimiters for separating 186 * tokens. 187 * <p>If the returnDelims flag is false, the delimiter characters are 188 * skipped and only serve as separators between tokens. 189 * <p>If the returnDelims flag is true, then the delimiter characters 190 * are also returned as tokens, one per delimiter. 191 * @param str a string to be parsed. 192 * @param delim the delimiters. 193 * @param returndelims flag indicating whether to return the delimiters 194 * as tokens. 195 * @exception NullPointerException if str is null 196 */ 197 public StringTokenizer(String str, String delim, boolean returndelims) 198 { 199 this(str, delim, returndelims, false); // java default behavior 200 } 201 202 /** 203 * <p>Constructs a string tokenizer for the specified string. All 204 * characters in the delim argument are the delimiters for separating 205 * tokens. 206 * <p>If the returnDelims flag is false, the delimiter characters are 207 * skipped and only serve as separators between tokens. 208 * <p>If the returnDelims flag is true, then the delimiter characters 209 * are also returned as tokens. If coalescedelims is true, one token 210 * is returned for each run of delimiter characters, otherwise one 211 * token is returned per delimiter. Since surrogate pairs can be 212 * delimiters, the returned token might be two chars in length. 213 * @param str a string to be parsed. 214 * @param delim the delimiters. 215 * @param returndelims flag indicating whether to return the delimiters 216 * as tokens. 217 * @param coalescedelims flag indicating whether to return a run of 218 * delimiters as a single token or as one token per delimiter. 219 * This only takes effect if returndelims is true. 220 * @exception NullPointerException if str is null 221 * @deprecated This API is ICU internal only. 222 * @hide draft / provisional / internal are hidden on Android 223 */ 224 @Deprecated 225 public StringTokenizer(String str, String delim, boolean returndelims, boolean coalescedelims) 226 { 227 // don't ignore whitespace 228 m_delimiters_ = EMPTY_DELIMITER_; 229 if (delim != null && delim.length() > 0) { 230 m_delimiters_ = new UnicodeSet(); 231 m_delimiters_.addAll(delim); 232 checkDelimiters(); 233 } 234 m_coalesceDelimiters_ = coalescedelims; 235 m_source_ = str; 236 m_length_ = str.length(); 237 m_returnDelimiters_ = returndelims; 238 m_tokenOffset_ = -1; 239 m_tokenSize_ = -1; 240 if (m_length_ == 0) { 241 // string length 0, no tokens 242 m_nextOffset_ = -1; 243 } 244 else { 245 m_nextOffset_ = 0; 246 if (!returndelims) { 247 m_nextOffset_ = getNextNonDelimiter(0); 248 } 249 } 250 } 251 252 /** 253 * <p>Constructs a string tokenizer for the specified string. The 254 * characters in the delim argument are the delimiters for separating 255 * tokens. 256 * <p>Delimiter characters themselves will not be treated as tokens. 257 * @param str a string to be parsed. 258 * @param delim the delimiters. 259 * @exception NullPointerException if str is null 260 */ 261 public StringTokenizer(String str, String delim) 262 { 263 // don't ignore whitespace 264 this(str, delim, false, false); 265 } 266 267 /** 268 * <p>Constructs a string tokenizer for the specified string. 269 * The tokenizer uses the default delimiter set, which is 270 * " \t\n\r\f": 271 * the space character, the tab character, the newline character, the 272 * carriage-return character, and the form-feed character. 273 * <p>Delimiter characters themselves will not be treated as tokens. 274 * @param str a string to be parsed 275 * @exception NullPointerException if str is null 276 */ 277 public StringTokenizer(String str) 278 { 279 this(str, DEFAULT_DELIMITERS_, false, false); 280 } 281 282 // public methods -------------------------------------------------- 283 284 /** 285 * Tests if there are more tokens available from this tokenizer's 286 * string. 287 * If this method returns <tt>true</tt>, then a subsequent call to 288 * <tt>nextToken</tt> with no argument will successfully return a token. 289 * @return <code>true</code> if and only if there is at least one token 290 * in the string after the current position; <code>false</code> 291 * otherwise. 292 */ 293 public boolean hasMoreTokens() 294 { 295 return m_nextOffset_ >= 0; 296 } 297 298 /** 299 * Returns the next token from this string tokenizer. 300 * @return the next token from this string tokenizer. 301 * @exception NoSuchElementException if there are no more tokens in 302 * this tokenizer's string. 303 */ 304 public String nextToken() 305 { 306 if (m_tokenOffset_ < 0) { 307 if (m_nextOffset_ < 0) { 308 throw new NoSuchElementException("No more tokens in String"); 309 } 310 // pre-calculations of tokens not done 311 if (m_returnDelimiters_) { 312 int tokenlimit = 0; 313 int c = UTF16.charAt(m_source_, m_nextOffset_); 314 boolean contains = delims == null 315 ? m_delimiters_.contains(c) 316 : c < delims.length && delims[c]; 317 if (contains) { 318 if (m_coalesceDelimiters_) { 319 tokenlimit = getNextNonDelimiter(m_nextOffset_); 320 } else { 321 tokenlimit = m_nextOffset_ + UTF16.getCharCount(c); 322 if (tokenlimit == m_length_) { 323 tokenlimit = -1; 324 } 325 } 326 } 327 else { 328 tokenlimit = getNextDelimiter(m_nextOffset_); 329 } 330 String result; 331 if (tokenlimit < 0) { 332 result = m_source_.substring(m_nextOffset_); 333 } 334 else { 335 result = m_source_.substring(m_nextOffset_, tokenlimit); 336 } 337 m_nextOffset_ = tokenlimit; 338 return result; 339 } 340 else { 341 int tokenlimit = getNextDelimiter(m_nextOffset_); 342 String result; 343 if (tokenlimit < 0) { 344 result = m_source_.substring(m_nextOffset_); 345 m_nextOffset_ = tokenlimit; 346 } 347 else { 348 result = m_source_.substring(m_nextOffset_, tokenlimit); 349 m_nextOffset_ = getNextNonDelimiter(tokenlimit); 350 } 351 352 return result; 353 } 354 } 355 // count was called before and we have all the tokens 356 if (m_tokenOffset_ >= m_tokenSize_) { 357 throw new NoSuchElementException("No more tokens in String"); 358 } 359 String result; 360 if (m_tokenLimit_[m_tokenOffset_] >= 0) { 361 result = m_source_.substring(m_tokenStart_[m_tokenOffset_], 362 m_tokenLimit_[m_tokenOffset_]); 363 } 364 else { 365 result = m_source_.substring(m_tokenStart_[m_tokenOffset_]); 366 } 367 m_tokenOffset_ ++; 368 m_nextOffset_ = -1; 369 if (m_tokenOffset_ < m_tokenSize_) { 370 m_nextOffset_ = m_tokenStart_[m_tokenOffset_]; 371 } 372 return result; 373 } 374 375 /** 376 * Returns the next token in this string tokenizer's string. First, 377 * the set of characters considered to be delimiters by this 378 * <tt>StringTokenizer</tt> object is changed to be the characters in 379 * the string <tt>delim</tt>. Then the next token in the string 380 * after the current position is returned. The current position is 381 * advanced beyond the recognized token. The new delimiter set 382 * remains the default after this call. 383 * @param delim the new delimiters. 384 * @return the next token, after switching to the new delimiter set. 385 * @exception NoSuchElementException if there are no more tokens in 386 * this tokenizer's string. 387 */ 388 public String nextToken(String delim) 389 { 390 m_delimiters_ = EMPTY_DELIMITER_; 391 if (delim != null && delim.length() > 0) { 392 m_delimiters_ = new UnicodeSet(); 393 m_delimiters_.addAll(delim); 394 } 395 return nextToken(m_delimiters_); 396 } 397 398 /** 399 * <strong>[icu]</strong> Returns the next token in this string tokenizer's string. First, 400 * the set of characters considered to be delimiters by this 401 * <tt>StringTokenizer</tt> object is changed to be the characters in 402 * the string <tt>delim</tt>. Then the next token in the string 403 * after the current position is returned. The current position is 404 * advanced beyond the recognized token. The new delimiter set 405 * remains the default after this call. 406 * @param delim the new delimiters. 407 * @return the next token, after switching to the new delimiter set. 408 * @exception NoSuchElementException if there are no more tokens in 409 * this tokenizer's string. 410 */ 411 public String nextToken(UnicodeSet delim) 412 { 413 m_delimiters_ = delim; 414 checkDelimiters(); 415 m_tokenOffset_ = -1; 416 m_tokenSize_ = -1; 417 if (!m_returnDelimiters_) { 418 m_nextOffset_ = getNextNonDelimiter(m_nextOffset_); 419 } 420 return nextToken(); 421 } 422 423 /** 424 * Returns the same value as the <code>hasMoreTokens</code> method. 425 * It exists so that this class can implement the 426 * <code>Enumeration</code> interface. 427 * @return <code>true</code> if there are more tokens; 428 * <code>false</code> otherwise. 429 * @see #hasMoreTokens() 430 */ 431 public boolean hasMoreElements() 432 { 433 return hasMoreTokens(); 434 } 435 436 /** 437 * Returns the same value as the <code>nextToken</code> method, except 438 * that its declared return value is <code>Object</code> rather than 439 * <code>String</code>. It exists so that this class can implement the 440 * <code>Enumeration</code> interface. 441 * @return the next token in the string. 442 * @exception NoSuchElementException if there are no more tokens in 443 * this tokenizer's string. 444 * @see #nextToken() 445 */ 446 public Object nextElement() 447 { 448 return nextToken(); 449 } 450 451 /** 452 * Calculates the number of times that this tokenizer's 453 * <code>nextToken</code> method can be called before it generates an 454 * exception. The current position is not advanced. 455 * @return the number of tokens remaining in the string using the 456 * current delimiter set. 457 * @see #nextToken() 458 */ 459 public int countTokens() 460 { 461 int result = 0; 462 if (hasMoreTokens()) { 463 if (m_tokenOffset_ >= 0) { 464 return m_tokenSize_ - m_tokenOffset_; 465 } 466 if (m_tokenStart_ == null) { 467 m_tokenStart_ = new int[TOKEN_SIZE_]; 468 m_tokenLimit_ = new int[TOKEN_SIZE_]; 469 } 470 do { 471 if (m_tokenStart_.length == result) { 472 int temptokenindex[] = m_tokenStart_; 473 int temptokensize[] = m_tokenLimit_; 474 int originalsize = temptokenindex.length; 475 int newsize = originalsize + TOKEN_SIZE_; 476 m_tokenStart_ = new int[newsize]; 477 m_tokenLimit_ = new int[newsize]; 478 System.arraycopy(temptokenindex, 0, m_tokenStart_, 0, 479 originalsize); 480 System.arraycopy(temptokensize, 0, m_tokenLimit_, 0, 481 originalsize); 482 } 483 m_tokenStart_[result] = m_nextOffset_; 484 if (m_returnDelimiters_) { 485 int c = UTF16.charAt(m_source_, m_nextOffset_); 486 boolean contains = delims == null 487 ? m_delimiters_.contains(c) 488 : c < delims.length && delims[c]; 489 if (contains) { 490 if (m_coalesceDelimiters_) { 491 m_tokenLimit_[result] = getNextNonDelimiter( 492 m_nextOffset_); 493 } else { 494 int p = m_nextOffset_ + 1; 495 if (p == m_length_) { 496 p = -1; 497 } 498 m_tokenLimit_[result] = p; 499 500 } 501 } 502 else { 503 m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_); 504 } 505 m_nextOffset_ = m_tokenLimit_[result]; 506 } 507 else { 508 m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_); 509 m_nextOffset_ = getNextNonDelimiter(m_tokenLimit_[result]); 510 } 511 result ++; 512 } while (m_nextOffset_ >= 0); 513 m_tokenOffset_ = 0; 514 m_tokenSize_ = result; 515 m_nextOffset_ = m_tokenStart_[0]; 516 } 517 return result; 518 } 519 520 // private data members ------------------------------------------------- 521 522 /** 523 * Current offset to the token array. If the array token is not set up yet, 524 * this value is a -1 525 */ 526 private int m_tokenOffset_; 527 /** 528 * Size of the token array. If the array token is not set up yet, 529 * this value is a -1 530 */ 531 private int m_tokenSize_; 532 /** 533 * Array of pre-calculated tokens start indexes in source string terminated 534 * by -1. 535 * This is only set up during countTokens() and only stores the remaining 536 * tokens, not all tokens including parsed ones 537 */ 538 private int m_tokenStart_[]; 539 /** 540 * Array of pre-calculated tokens limit indexes in source string. 541 * This is only set up during countTokens() and only stores the remaining 542 * tokens, not all tokens including parsed ones 543 */ 544 private int m_tokenLimit_[]; 545 /** 546 * UnicodeSet containing delimiters 547 */ 548 private UnicodeSet m_delimiters_; 549 /** 550 * String to parse for tokens 551 */ 552 private String m_source_; 553 /** 554 * Length of m_source_ 555 */ 556 private int m_length_; 557 /** 558 * Current position in string to parse for tokens 559 */ 560 private int m_nextOffset_; 561 /** 562 * Flag indicator if delimiters are to be treated as tokens too 563 */ 564 private boolean m_returnDelimiters_; 565 566 /** 567 * Flag indicating whether to coalesce runs of delimiters into single tokens 568 */ 569 private boolean m_coalesceDelimiters_; 570 571 /** 572 * Default set of delimiters \t\n\r\f 573 */ 574 private static final UnicodeSet DEFAULT_DELIMITERS_ 575 = new UnicodeSet(0x09, 0x0a, 0x0c, 0x0d, 0x20, 0x20); // UnicodeSet("[ \t\n\r\f]", false) 576 /** 577 * Array size increments 578 */ 579 private static final int TOKEN_SIZE_ = 100; 580 /** 581 * A empty delimiter UnicodeSet, used when user specified null delimiters 582 */ 583 private static final UnicodeSet EMPTY_DELIMITER_ = UnicodeSet.EMPTY; 584 585 // private methods ------------------------------------------------------ 586 587 /** 588 * Gets the index of the next delimiter after offset 589 * @param offset to the source string 590 * @return offset of the immediate next delimiter, otherwise 591 * (- source string length - 1) if there 592 * are no more delimiters after m_nextOffset 593 */ 594 private int getNextDelimiter(int offset) 595 { 596 if (offset >= 0) { 597 int result = offset; 598 int c = 0; 599 if (delims == null) { 600 do { 601 c = UTF16.charAt(m_source_, result); 602 if (m_delimiters_.contains(c)) { 603 break; 604 } 605 result ++; 606 } while (result < m_length_); 607 } else { 608 do { 609 c = UTF16.charAt(m_source_, result); 610 if (c < delims.length && delims[c]) { 611 break; 612 } 613 result ++; 614 } while (result < m_length_); 615 } 616 if (result < m_length_) { 617 return result; 618 } 619 } 620 return -1 - m_length_; 621 } 622 623 /** 624 * Gets the index of the next non-delimiter after m_nextOffset_ 625 * @param offset to the source string 626 * @return offset of the immediate next non-delimiter, otherwise 627 * (- source string length - 1) if there 628 * are no more delimiters after m_nextOffset 629 */ 630 private int getNextNonDelimiter(int offset) 631 { 632 if (offset >= 0) { 633 int result = offset; 634 int c = 0; 635 if (delims == null) { 636 do { 637 c = UTF16.charAt(m_source_, result); 638 if (!m_delimiters_.contains(c)) { 639 break; 640 } 641 result ++; 642 } while (result < m_length_); 643 } else { 644 do { 645 c = UTF16.charAt(m_source_, result); 646 if (!(c < delims.length && delims[c])) { 647 break; 648 } 649 result ++; 650 } while (result < m_length_); 651 } 652 if (result < m_length_) { 653 return result; 654 } 655 } 656 return -1 - m_length_; 657 } 658 659 void checkDelimiters() { 660 if (m_delimiters_ == null || m_delimiters_.size() == 0) { 661 delims = new boolean[0]; 662 } else { 663 int maxChar = m_delimiters_.getRangeEnd(m_delimiters_.getRangeCount()-1); 664 if (maxChar < 0x7f) { 665 delims = new boolean[maxChar+1]; 666 for (int i = 0, ch; -1 != (ch = m_delimiters_.charAt(i)); ++i) { 667 delims[ch] = true; 668 } 669 } else { 670 delims = null; 671 } 672 } 673 } 674 private boolean[] delims; 675} 676