1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3****************************************************************************** 4* Copyright (C) 1996-2015, International Business Machines Corporation and 5* others. All Rights Reserved. 6****************************************************************************** 7*/ 8 9package android.icu.impl; 10 11import java.util.NoSuchElementException; 12 13import android.icu.lang.UCharacter; 14import android.icu.text.UTF16; 15import android.icu.util.RangeValueIterator; 16 17/** 18 * <p>Class enabling iteration of the values in a Trie.</p> 19 * 20 * <p>2015-sep-03 TODO: Only used in test code, move there. 21 * 22 * <p>Result of each iteration contains the interval of codepoints that have 23 * the same value type and the value type itself.</p> 24 * <p>The comparison of each codepoint value is done via extract(), which the 25 * default implementation is to return the value as it is.</p> 26 * <p>Method extract() can be overwritten to perform manipulations on 27 * codepoint values in order to perform specialized comparison.</p> 28 * <p>TrieIterator is designed to be a generic iterator for the CharTrie 29 * and the IntTrie, hence to accommodate both types of data, the return 30 * result will be in terms of int (32 bit) values.</p> 31 * <p>See android.icu.text.UCharacterTypeIterator for examples of use.</p> 32 * <p>Notes for porting utrie_enum from icu4c to icu4j:<br> 33 * Internally, icu4c's utrie_enum performs all iterations in its body. In Java 34 * sense, the caller will have to pass a object with a callback function 35 * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, 36 * uint32_t value) into utrie_enum. utrie_enum will then find ranges of 37 * codepoints with the same value as determined by 38 * UTrieEnumValue(const void *context, uint32_t value). for each range, 39 * utrie_enum calls the callback function to perform a task. In this way, 40 * icu4c performs the iteration within utrie_enum. 41 * To follow the JDK model, icu4j is slightly different from icu4c. 42 * Instead of requesting the caller to implement an object for a callback. 43 * The caller will have to implement a subclass of TrieIterator, fleshing out 44 * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j, 45 * the caller will have to code his own iteration and flesh out the task 46 * (equivalent to UTrieEnumRange) to be performed in the iteration loop. 47 * </p> 48 * <p>There are basically 3 usage scenarios for porting:</p> 49 * <p>1) UTrieEnumValue is the only implemented callback then just implement a 50 * subclass of TrieIterator and override the extract(int) method. The 51 * extract(int) method is analogus to UTrieEnumValue callback. 52 * </p> 53 * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement 54 * a subclass of TrieIterator, override the extract method and iterate, e.g 55 * </p> 56 * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange, 57 * set);<br> 58 * In Java :<br> 59 * <pre> 60 * class TrieIteratorImpl extends TrieIterator{ 61 * public TrieIteratorImpl(Trie data){ 62 * super(data); 63 * } 64 * public int extract(int value){ 65 * // port the implementation of _enumPropertyStartsValue here 66 * } 67 * } 68 * .... 69 * TrieIterator fcdIter = new TrieIteratorImpl(fcdTrieImpl.fcdTrie); 70 * while(fcdIter.next(result)) { 71 * // port the implementation of _enumPropertyStartsRange 72 * } 73 * </pre> 74 * </p> 75 * <p>3) UTrieEnumRange is the only implemented callback then just implement 76 * the while loop, when utrie_enum is called 77 * <pre> 78 * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set); 79 * TrieIterator fcdIter = new TrieIterator(fcdTrieImpl.fcdTrie); 80 * while(fcdIter.next(result)){ 81 * set.add(result.start); 82 * } 83 * </p> 84 * @author synwee 85 * @see android.icu.impl.Trie 86 * @hide Only a subset of ICU is exposed in Android 87 */ 88public class TrieIterator implements RangeValueIterator 89 90{ 91 // public constructor --------------------------------------------- 92 93 /** 94 * TrieEnumeration constructor 95 * @param trie to be used 96 * @exception IllegalArgumentException throw when argument is null. 97 */ 98 public TrieIterator(Trie trie) 99 { 100 if (trie == null) { 101 throw new IllegalArgumentException( 102 "Argument trie cannot be null"); 103 } 104 m_trie_ = trie; 105 // synwee: check that extract belongs to the child class 106 m_initialValue_ = extract(m_trie_.getInitialValue()); 107 reset(); 108 } 109 110 // public methods ------------------------------------------------- 111 112 /** 113 * <p>Returns true if we are not at the end of the iteration, false 114 * otherwise.</p> 115 * <p>The next set of codepoints with the same value type will be 116 * calculated during this call and returned in the arguement element.</p> 117 * @param element return result 118 * @return true if we are not at the end of the iteration, false otherwise. 119 * @exception NoSuchElementException - if no more elements exist. 120 * @see android.icu.util.RangeValueIterator.Element 121 */ 122 public final boolean next(Element element) 123 { 124 if (m_nextCodepoint_ > UCharacter.MAX_VALUE) { 125 return false; 126 } 127 if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE && 128 calculateNextBMPElement(element)) { 129 return true; 130 } 131 calculateNextSupplementaryElement(element); 132 return true; 133 } 134 135 /** 136 * Resets the iterator to the beginning of the iteration 137 */ 138 public final void reset() 139 { 140 m_currentCodepoint_ = 0; 141 m_nextCodepoint_ = 0; 142 m_nextIndex_ = 0; 143 m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_; 144 if (m_nextBlock_ == m_trie_.m_dataOffset_) { 145 m_nextValue_ = m_initialValue_; 146 } 147 else { 148 m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_)); 149 } 150 m_nextBlockIndex_ = 0; 151 m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_; 152 } 153 154 // protected methods ---------------------------------------------- 155 156 /** 157 * Called by next() to extracts a 32 bit value from a trie value 158 * used for comparison. 159 * This method is to be overwritten if special manipulation is to be done 160 * to retrieve a relevant comparison. 161 * The default function is to return the value as it is. 162 * @param value a value from the trie 163 * @return extracted value 164 */ 165 protected int extract(int value) 166 { 167 return value; 168 } 169 170 // private methods ------------------------------------------------ 171 172 /** 173 * Set the result values 174 * @param element return result object 175 * @param start codepoint of range 176 * @param limit (end + 1) codepoint of range 177 * @param value common value of range 178 */ 179 private final void setResult(Element element, int start, int limit, 180 int value) 181 { 182 element.start = start; 183 element.limit = limit; 184 element.value = value; 185 } 186 187 /** 188 * Finding the next element. 189 * This method is called just before returning the result of 190 * next(). 191 * We always store the next element before it is requested. 192 * In the case that we have to continue calculations into the 193 * supplementary planes, a false will be returned. 194 * @param element return result object 195 * @return true if the next range is found, false if we have to proceed to 196 * the supplementary range. 197 */ 198 private final boolean calculateNextBMPElement(Element element) 199 { 200 int currentValue = m_nextValue_; 201 m_currentCodepoint_ = m_nextCodepoint_; 202 m_nextCodepoint_ ++; 203 m_nextBlockIndex_ ++; 204 if (!checkBlockDetail(currentValue)) { 205 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 206 currentValue); 207 return true; 208 } 209 // synwee check that next block index == 0 here 210 // enumerate BMP - the main loop enumerates data blocks 211 while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) { 212 // because of the way the character is split to form the index 213 // the lead surrogate and trail surrogate can not be in the 214 // mid of a block 215 if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) { 216 // skip lead surrogate code units, 217 // go to lead surrogate codepoints 218 m_nextIndex_ = BMP_INDEX_LENGTH_; 219 } 220 else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) { 221 // go back to regular BMP code points 222 m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_; 223 } else { 224 m_nextIndex_ ++; 225 } 226 227 m_nextBlockIndex_ = 0; 228 if (!checkBlock(currentValue)) { 229 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 230 currentValue); 231 return true; 232 } 233 } 234 m_nextCodepoint_ --; // step one back since this value has not been 235 m_nextBlockIndex_ --; // retrieved yet. 236 return false; 237 } 238 239 /** 240 * Finds the next supplementary element. 241 * For each entry in the trie, the value to be delivered is passed through 242 * extract(). 243 * We always store the next element before it is requested. 244 * Called after calculateNextBMP() completes its round of BMP characters. 245 * There is a slight difference in the usage of m_currentCodepoint_ 246 * here as compared to calculateNextBMP(). Though both represents the 247 * lower bound of the next element, in calculateNextBMP() it gets set 248 * at the start of any loop, where-else, in calculateNextSupplementary() 249 * since m_currentCodepoint_ already contains the lower bound of the 250 * next element (passed down from calculateNextBMP()), we keep it till 251 * the end before resetting it to the new value. 252 * Note, if there are no more iterations, it will never get to here. 253 * Blocked out by next(). 254 * @param element return result object 255 */ 256 private final void calculateNextSupplementaryElement(Element element) 257 { 258 int currentValue = m_nextValue_; 259 m_nextCodepoint_ ++; 260 m_nextBlockIndex_ ++; 261 262 if (UTF16.getTrailSurrogate(m_nextCodepoint_) 263 != UTF16.TRAIL_SURROGATE_MIN_VALUE) { 264 // this piece is only called when we are in the middle of a lead 265 // surrogate block 266 if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) { 267 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 268 currentValue); 269 m_currentCodepoint_ = m_nextCodepoint_; 270 return; 271 } 272 // we have cleared one block 273 m_nextIndex_ ++; 274 m_nextTrailIndexOffset_ ++; 275 if (!checkTrailBlock(currentValue)) { 276 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 277 currentValue); 278 m_currentCodepoint_ = m_nextCodepoint_; 279 return; 280 } 281 } 282 int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); 283 // enumerate supplementary code points 284 while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) { 285 // lead surrogate access 286 final int leadBlock = 287 m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << 288 Trie.INDEX_STAGE_2_SHIFT_; 289 if (leadBlock == m_trie_.m_dataOffset_) { 290 // no entries for a whole block of lead surrogates 291 if (currentValue != m_initialValue_) { 292 m_nextValue_ = m_initialValue_; 293 m_nextBlock_ = leadBlock; // == m_trie_.m_dataOffset_ 294 m_nextBlockIndex_ = 0; 295 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 296 currentValue); 297 m_currentCodepoint_ = m_nextCodepoint_; 298 return; 299 } 300 301 nextLead += DATA_BLOCK_LENGTH_; 302 // number of total affected supplementary codepoints in one 303 // block 304 // this is not a simple addition of 305 // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider 306 // that we might have moved some of the codepoints 307 m_nextCodepoint_ = Character.toCodePoint((char)nextLead, (char)UTF16.TRAIL_SURROGATE_MIN_VALUE); 308 continue; 309 } 310 if (m_trie_.m_dataManipulate_ == null) { 311 throw new NullPointerException( 312 "The field DataManipulate in this Trie is null"); 313 } 314 // enumerate trail surrogates for this lead surrogate 315 m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( 316 m_trie_.getValue(leadBlock + 317 (nextLead & Trie.INDEX_STAGE_3_MASK_))); 318 if (m_nextIndex_ <= 0) { 319 // no data for this lead surrogate 320 if (currentValue != m_initialValue_) { 321 m_nextValue_ = m_initialValue_; 322 m_nextBlock_ = m_trie_.m_dataOffset_; 323 m_nextBlockIndex_ = 0; 324 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 325 currentValue); 326 m_currentCodepoint_ = m_nextCodepoint_; 327 return; 328 } 329 m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_; 330 } else { 331 m_nextTrailIndexOffset_ = 0; 332 if (!checkTrailBlock(currentValue)) { 333 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 334 currentValue); 335 m_currentCodepoint_ = m_nextCodepoint_; 336 return; 337 } 338 } 339 nextLead ++; 340 } 341 342 // deliver last range 343 setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1, 344 currentValue); 345 } 346 347 /** 348 * Internal block value calculations 349 * Performs calculations on a data block to find codepoints in m_nextBlock_ 350 * after the index m_nextBlockIndex_ that has the same value. 351 * Note m_*_ variables at this point is the next codepoint whose value 352 * has not been calculated. 353 * But when returned with false, it will be the last codepoint whose 354 * value has been calculated. 355 * @param currentValue the value which other codepoints are tested against 356 * @return true if the whole block has the same value as currentValue or if 357 * the whole block has been calculated, false otherwise. 358 */ 359 private final boolean checkBlockDetail(int currentValue) 360 { 361 while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) { 362 m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ + 363 m_nextBlockIndex_)); 364 if (m_nextValue_ != currentValue) { 365 return false; 366 } 367 ++ m_nextBlockIndex_; 368 ++ m_nextCodepoint_; 369 } 370 return true; 371 } 372 373 /** 374 * Internal block value calculations 375 * Performs calculations on a data block to find codepoints in m_nextBlock_ 376 * that has the same value. 377 * Will call checkBlockDetail() if highlevel check fails. 378 * Note m_*_ variables at this point is the next codepoint whose value 379 * has not been calculated. 380 * @param currentBlock the initial block containing all currentValue 381 * @param currentValue the value which other codepoints are tested against 382 * @return true if the whole block has the same value as currentValue or if 383 * the whole block has been calculated, false otherwise. 384 */ 385 private final boolean checkBlock(int currentValue) 386 { 387 int currentBlock = m_nextBlock_; 388 m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] << 389 Trie.INDEX_STAGE_2_SHIFT_; 390 if (m_nextBlock_ == currentBlock && 391 (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) { 392 // the block is the same as the previous one, filled with 393 // currentValue 394 m_nextCodepoint_ += DATA_BLOCK_LENGTH_; 395 } 396 else if (m_nextBlock_ == m_trie_.m_dataOffset_) { 397 // this is the all-initial-value block 398 if (currentValue != m_initialValue_) { 399 m_nextValue_ = m_initialValue_; 400 m_nextBlockIndex_ = 0; 401 return false; 402 } 403 m_nextCodepoint_ += DATA_BLOCK_LENGTH_; 404 } 405 else { 406 if (!checkBlockDetail(currentValue)) { 407 return false; 408 } 409 } 410 return true; 411 } 412 413 /** 414 * Internal block value calculations 415 * Performs calculations on multiple data blocks for a set of trail 416 * surrogates to find codepoints in m_nextBlock_ that has the same value. 417 * Will call checkBlock() for internal block checks. 418 * Note m_*_ variables at this point is the next codepoint whose value 419 * has not been calculated. 420 * @param currentValue the value which other codepoints are tested against 421 * @return true if the whole block has the same value as currentValue or if 422 * the whole block has been calculated, false otherwise. 423 */ 424 private final boolean checkTrailBlock(int currentValue) 425 { 426 // enumerate code points for this lead surrogate 427 while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_) 428 { 429 // if we ever reach here, we are at the start of a new block 430 m_nextBlockIndex_ = 0; 431 // copy of most of the body of the BMP loop 432 if (!checkBlock(currentValue)) { 433 return false; 434 } 435 m_nextTrailIndexOffset_ ++; 436 m_nextIndex_ ++; 437 } 438 return true; 439 } 440 441 /** 442 * Checks if we are beginning at the start of a initial block. 443 * If we are then the rest of the codepoints in this initial block 444 * has the same values. 445 * We increment m_nextCodepoint_ and relevant data members if so. 446 * This is used only in for the supplementary codepoints because 447 * the offset to the trail indexes could be 0. 448 * @return true if we are at the start of a initial block. 449 */ 450 private final boolean checkNullNextTrailIndex() 451 { 452 if (m_nextIndex_ <= 0) { 453 m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; 454 int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); 455 int leadBlock = 456 m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << 457 Trie.INDEX_STAGE_2_SHIFT_; 458 if (m_trie_.m_dataManipulate_ == null) { 459 throw new NullPointerException( 460 "The field DataManipulate in this Trie is null"); 461 } 462 m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( 463 m_trie_.getValue(leadBlock + 464 (nextLead & Trie.INDEX_STAGE_3_MASK_))); 465 m_nextIndex_ --; 466 m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; 467 return true; 468 } 469 return false; 470 } 471 472 // private data members -------------------------------------------- 473 474 /** 475 * Size of the stage 1 BMP indexes 476 */ 477 private static final int BMP_INDEX_LENGTH_ = 478 0x10000 >> Trie.INDEX_STAGE_1_SHIFT_; 479 /** 480 * Lead surrogate minimum value 481 */ 482 private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800; 483 /** 484 * Trail surrogate minimum value 485 */ 486 private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00; 487 /* 488 * Trail surrogate maximum value 489 */ 490 //private static final int TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF; 491 /** 492 * Number of trail surrogate 493 */ 494 private static final int TRAIL_SURROGATE_COUNT_ = 0x400; 495 /** 496 * Number of stage 1 indexes for supplementary calculations that maps to 497 * each lead surrogate character. 498 * See second pass into getRawOffset for the trail surrogate character. 499 * 10 for significant number of bits for trail surrogates, 5 for what we 500 * discard during shifting. 501 */ 502 private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ = 503 1 << (10 - Trie.INDEX_STAGE_1_SHIFT_); 504 /** 505 * Number of data values in a stage 2 (data array) block. 506 */ 507 private static final int DATA_BLOCK_LENGTH_ = 508 1 << Trie.INDEX_STAGE_1_SHIFT_; 509// /** 510// * Number of codepoints in a stage 2 block 511// */ 512// private static final int DATA_BLOCK_SUPPLEMENTARY_LENGTH_ = 513// DATA_BLOCK_LENGTH_ << 10; 514 /** 515 * Trie instance 516 */ 517 private Trie m_trie_; 518 /** 519 * Initial value for trie values 520 */ 521 private int m_initialValue_; 522 /** 523 * Next element results and data. 524 */ 525 private int m_currentCodepoint_; 526 private int m_nextCodepoint_; 527 private int m_nextValue_; 528 private int m_nextIndex_; 529 private int m_nextBlock_; 530 private int m_nextBlockIndex_; 531 private int m_nextTrailIndexOffset_; 532} 533