1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/* 5******************************************************************************* 6* Copyright (C) 2010-2014, International Business Machines 7* Corporation and others. All Rights Reserved. 8******************************************************************************* 9* created on: 2010nov23 10* created by: Markus W. Scherer 11* ported from ICU4C bytestrie.h/.cpp 12*/ 13package android.icu.util; 14 15import java.io.IOException; 16import java.nio.ByteBuffer; 17import java.util.ArrayList; 18import java.util.NoSuchElementException; 19 20/** 21 * Light-weight, non-const reader class for a BytesTrie. 22 * Traverses a byte-serialized data structure with minimal state, 23 * for mapping byte sequences to non-negative integer values. 24 * 25 * <p>This class is not intended for public subclassing. 26 * 27 * @author Markus W. Scherer 28 * @hide Only a subset of ICU is exposed in Android 29 */ 30public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> { 31 /** 32 * Constructs a BytesTrie reader instance. 33 * 34 * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder, 35 * with the offset indicating the first byte of that sequence. 36 * The BytesTrie object will not read more bytes than 37 * the BytesTrieBuilder generated in the corresponding build() call. 38 * 39 * <p>The array is not copied/cloned and must not be modified while 40 * the BytesTrie object is in use. 41 * 42 * @param trieBytes Bytes array that contains the serialized trie. 43 * @param offset Root offset of the trie in the array. 44 */ 45 public BytesTrie(byte[] trieBytes, int offset) { 46 bytes_=trieBytes; 47 pos_=root_=offset; 48 remainingMatchLength_=-1; 49 } 50 51 /** 52 * Clones this trie reader object and its state, 53 * but not the byte array which will be shared. 54 * @return A shallow clone of this trie. 55 */ 56 @Override 57 public Object clone() throws CloneNotSupportedException { 58 return super.clone(); // A shallow copy is just what we need. 59 } 60 61 /** 62 * Resets this trie to its initial state. 63 * @return this 64 */ 65 public BytesTrie reset() { 66 pos_=root_; 67 remainingMatchLength_=-1; 68 return this; 69 } 70 71 /** 72 * BytesTrie state object, for saving a trie's current state 73 * and resetting the trie back to this state later. 74 */ 75 public static final class State { 76 /** 77 * Constructs an empty State. 78 */ 79 public State() {} 80 private byte[] bytes; 81 private int root; 82 private int pos; 83 private int remainingMatchLength; 84 } 85 86 /** 87 * Saves the state of this trie. 88 * @param state The State object to hold the trie's state. 89 * @return this 90 * @see #resetToState 91 */ 92 public BytesTrie saveState(State state) /*const*/ { 93 state.bytes=bytes_; 94 state.root=root_; 95 state.pos=pos_; 96 state.remainingMatchLength=remainingMatchLength_; 97 return this; 98 } 99 100 /** 101 * Resets this trie to the saved state. 102 * @param state The State object which holds a saved trie state. 103 * @return this 104 * @throws IllegalArgumentException if the state object contains no state, 105 * or the state of a different trie 106 * @see #saveState 107 * @see #reset 108 */ 109 public BytesTrie resetToState(State state) { 110 if(bytes_==state.bytes && bytes_!=null && root_==state.root) { 111 pos_=state.pos; 112 remainingMatchLength_=state.remainingMatchLength; 113 } else { 114 throw new IllegalArgumentException("incompatible trie state"); 115 } 116 return this; 117 } 118 119 /** 120 * Return values for BytesTrie.next(), CharsTrie.next() and similar methods. 121 */ 122 public enum Result { 123 /** 124 * The input unit(s) did not continue a matching string. 125 * Once current()/next() return NO_MATCH, 126 * all further calls to current()/next() will also return NO_MATCH, 127 * until the trie is reset to its original state or to a saved state. 128 */ 129 NO_MATCH, 130 /** 131 * The input unit(s) continued a matching string 132 * but there is no value for the string so far. 133 * (It is a prefix of a longer string.) 134 */ 135 NO_VALUE, 136 /** 137 * The input unit(s) continued a matching string 138 * and there is a value for the string so far. 139 * This value will be returned by getValue(). 140 * No further input byte/unit can continue a matching string. 141 */ 142 FINAL_VALUE, 143 /** 144 * The input unit(s) continued a matching string 145 * and there is a value for the string so far. 146 * This value will be returned by getValue(). 147 * Another input byte/unit can continue a matching string. 148 */ 149 INTERMEDIATE_VALUE; 150 151 // Note: The following methods assume the particular order 152 // of enum constants, treating the ordinal() values like bit sets. 153 // Do not reorder the enum constants! 154 155 /** 156 * Same as (result!=NO_MATCH). 157 * @return true if the input bytes/units so far are part of a matching string/byte sequence. 158 */ 159 public boolean matches() { return this!=NO_MATCH; } 160 161 /** 162 * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE). 163 * @return true if there is a value for the input bytes/units so far. 164 * @see #getValue 165 */ 166 public boolean hasValue() { return ordinal()>=2; } 167 168 /** 169 * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE). 170 * @return true if another input byte/unit can continue a matching string. 171 */ 172 public boolean hasNext() { return (ordinal()&1)!=0; } 173 } 174 175 /** 176 * Determines whether the byte sequence so far matches, whether it has a value, 177 * and whether another input byte can continue a matching byte sequence. 178 * @return The match/value Result. 179 */ 180 public Result current() /*const*/ { 181 int pos=pos_; 182 if(pos<0) { 183 return Result.NO_MATCH; 184 } else { 185 int node; 186 return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ? 187 valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 188 } 189 } 190 191 /** 192 * Traverses the trie from the initial state for this input byte. 193 * Equivalent to reset().next(inByte). 194 * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. 195 * Values below -0x100 and above 0xff will never match. 196 * @return The match/value Result. 197 */ 198 public Result first(int inByte) { 199 remainingMatchLength_=-1; 200 if(inByte<0) { 201 inByte+=0x100; 202 } 203 return nextImpl(root_, inByte); 204 } 205 206 /** 207 * Traverses the trie from the current state for this input byte. 208 * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. 209 * Values below -0x100 and above 0xff will never match. 210 * @return The match/value Result. 211 */ 212 public Result next(int inByte) { 213 int pos=pos_; 214 if(pos<0) { 215 return Result.NO_MATCH; 216 } 217 if(inByte<0) { 218 inByte+=0x100; 219 } 220 int length=remainingMatchLength_; // Actual remaining match length minus 1. 221 if(length>=0) { 222 // Remaining part of a linear-match node. 223 if(inByte==(bytes_[pos++]&0xff)) { 224 remainingMatchLength_=--length; 225 pos_=pos; 226 int node; 227 return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ? 228 valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 229 } else { 230 stop(); 231 return Result.NO_MATCH; 232 } 233 } 234 return nextImpl(pos, inByte); 235 } 236 237 /** 238 * Traverses the trie from the current state for this byte sequence. 239 * Equivalent to 240 * <pre> 241 * Result result=current(); 242 * for(each c in s) 243 * if(!result.hasNext()) return Result.NO_MATCH; 244 * result=next(c); 245 * return result; 246 * </pre> 247 * @param s Contains a string or byte sequence. 248 * @param sIndex The start index of the byte sequence in s. 249 * @param sLimit The (exclusive) end index of the byte sequence in s. 250 * @return The match/value Result. 251 */ 252 public Result next(byte[] s, int sIndex, int sLimit) { 253 if(sIndex>=sLimit) { 254 // Empty input. 255 return current(); 256 } 257 int pos=pos_; 258 if(pos<0) { 259 return Result.NO_MATCH; 260 } 261 int length=remainingMatchLength_; // Actual remaining match length minus 1. 262 for(;;) { 263 // Fetch the next input byte, if there is one. 264 // Continue a linear-match node. 265 byte inByte; 266 for(;;) { 267 if(sIndex==sLimit) { 268 remainingMatchLength_=length; 269 pos_=pos; 270 int node; 271 return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ? 272 valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 273 } 274 inByte=s[sIndex++]; 275 if(length<0) { 276 remainingMatchLength_=length; 277 break; 278 } 279 if(inByte!=bytes_[pos]) { 280 stop(); 281 return Result.NO_MATCH; 282 } 283 ++pos; 284 --length; 285 } 286 for(;;) { 287 int node=bytes_[pos++]&0xff; 288 if(node<kMinLinearMatch) { 289 Result result=branchNext(pos, node, inByte&0xff); 290 if(result==Result.NO_MATCH) { 291 return Result.NO_MATCH; 292 } 293 // Fetch the next input byte, if there is one. 294 if(sIndex==sLimit) { 295 return result; 296 } 297 if(result==Result.FINAL_VALUE) { 298 // No further matching bytes. 299 stop(); 300 return Result.NO_MATCH; 301 } 302 inByte=s[sIndex++]; 303 pos=pos_; // branchNext() advanced pos and wrote it to pos_ . 304 } else if(node<kMinValueLead) { 305 // Match length+1 bytes. 306 length=node-kMinLinearMatch; // Actual match length minus 1. 307 if(inByte!=bytes_[pos]) { 308 stop(); 309 return Result.NO_MATCH; 310 } 311 ++pos; 312 --length; 313 break; 314 } else if((node&kValueIsFinal)!=0) { 315 // No further matching bytes. 316 stop(); 317 return Result.NO_MATCH; 318 } else { 319 // Skip intermediate value. 320 pos=skipValue(pos, node); 321 // The next node must not also be a value node. 322 assert((bytes_[pos]&0xff)<kMinValueLead); 323 } 324 } 325 } 326 } 327 328 /** 329 * Returns a matching byte sequence's value if called immediately after 330 * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE. 331 * getValue() can be called multiple times. 332 * 333 * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE! 334 * @return The value for the byte sequence so far. 335 */ 336 public int getValue() /*const*/ { 337 int pos=pos_; 338 int leadByte=bytes_[pos++]&0xff; 339 assert(leadByte>=kMinValueLead); 340 return readValue(bytes_, pos, leadByte>>1); 341 } 342 343 /** 344 * Determines whether all byte sequences reachable from the current state 345 * map to the same value, and if so, returns that value. 346 * @return The unique value in bits 32..1 with bit 0 set, 347 * if all byte sequences reachable from the current state 348 * map to the same value; otherwise returns 0. 349 */ 350 public long getUniqueValue() /*const*/ { 351 int pos=pos_; 352 if(pos<0) { 353 return 0; 354 } 355 // Skip the rest of a pending linear-match node. 356 long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0); 357 // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32. 358 return (uniqueValue<<31)>>31; 359 } 360 361 /** 362 * Finds each byte which continues the byte sequence from the current state. 363 * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now. 364 * @param out Each next byte is 0-extended to a char and appended to this object. 365 * (Only uses the out.append(c) method.) 366 * @return The number of bytes which continue the byte sequence from here. 367 */ 368 public int getNextBytes(Appendable out) /*const*/ { 369 int pos=pos_; 370 if(pos<0) { 371 return 0; 372 } 373 if(remainingMatchLength_>=0) { 374 append(out, bytes_[pos]&0xff); // Next byte of a pending linear-match node. 375 return 1; 376 } 377 int node=bytes_[pos++]&0xff; 378 if(node>=kMinValueLead) { 379 if((node&kValueIsFinal)!=0) { 380 return 0; 381 } else { 382 pos=skipValue(pos, node); 383 node=bytes_[pos++]&0xff; 384 assert(node<kMinValueLead); 385 } 386 } 387 if(node<kMinLinearMatch) { 388 if(node==0) { 389 node=bytes_[pos++]&0xff; 390 } 391 getNextBranchBytes(bytes_, pos, ++node, out); 392 return node; 393 } else { 394 // First byte of the linear-match node. 395 append(out, bytes_[pos]&0xff); 396 return 1; 397 } 398 } 399 400 /** 401 * Iterates from the current state of this trie. 402 * @return A new BytesTrie.Iterator. 403 */ 404 @Override 405 public Iterator iterator() { 406 return new Iterator(bytes_, pos_, remainingMatchLength_, 0); 407 } 408 409 /** 410 * Iterates from the current state of this trie. 411 * @param maxStringLength If 0, the iterator returns full strings/byte sequences. 412 * Otherwise, the iterator returns strings with this maximum length. 413 * @return A new BytesTrie.Iterator. 414 */ 415 public Iterator iterator(int maxStringLength) { 416 return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength); 417 } 418 419 /** 420 * Iterates from the root of a byte-serialized BytesTrie. 421 * @param trieBytes Bytes array that contains the serialized trie. 422 * @param offset Root offset of the trie in the array. 423 * @param maxStringLength If 0, the iterator returns full strings/byte sequences. 424 * Otherwise, the iterator returns strings with this maximum length. 425 * @return A new BytesTrie.Iterator. 426 */ 427 public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) { 428 return new Iterator(trieBytes, offset, -1, maxStringLength); 429 } 430 431 /** 432 * Return value type for the Iterator. 433 */ 434 public static final class Entry { 435 private Entry(int capacity) { 436 bytes=new byte[capacity]; 437 } 438 439 /** 440 * @return The length of the byte sequence. 441 */ 442 public int bytesLength() { return length; } 443 /** 444 * Returns a byte of the byte sequence. 445 * @param index An index into the byte sequence. 446 * @return The index-th byte sequence byte. 447 */ 448 public byte byteAt(int index) { return bytes[index]; } 449 /** 450 * Copies the byte sequence into a byte array. 451 * @param dest Destination byte array. 452 * @param destOffset Starting offset to where in dest the byte sequence is copied. 453 */ 454 public void copyBytesTo(byte[] dest, int destOffset) { 455 System.arraycopy(bytes, 0, dest, destOffset, length); 456 } 457 /** 458 * @return The byte sequence as a read-only ByteBuffer. 459 */ 460 public ByteBuffer bytesAsByteBuffer() { 461 return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer(); 462 } 463 464 /** 465 * The value associated with the byte sequence. 466 */ 467 public int value; 468 469 private void ensureCapacity(int len) { 470 if(bytes.length<len) { 471 byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)]; 472 System.arraycopy(bytes, 0, newBytes, 0, length); 473 bytes=newBytes; 474 } 475 } 476 private void append(byte b) { 477 ensureCapacity(length+1); 478 bytes[length++]=b; 479 } 480 private void append(byte[] b, int off, int len) { 481 ensureCapacity(length+len); 482 System.arraycopy(b, off, bytes, length, len); 483 length+=len; 484 } 485 private void truncateString(int newLength) { length=newLength; } 486 487 private byte[] bytes; 488 private int length; 489 } 490 491 /** 492 * Iterator for all of the (byte sequence, value) pairs in a BytesTrie. 493 */ 494 public static final class Iterator implements java.util.Iterator<Entry> { 495 private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) { 496 bytes_=trieBytes; 497 pos_=initialPos_=offset; 498 remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength; 499 maxLength_=maxStringLength; 500 entry_=new Entry(maxLength_!=0 ? maxLength_ : 32); 501 int length=remainingMatchLength_; // Actual remaining match length minus 1. 502 if(length>=0) { 503 // Pending linear-match node, append remaining bytes to entry_. 504 ++length; 505 if(maxLength_>0 && length>maxLength_) { 506 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 507 } 508 entry_.append(bytes_, pos_, length); 509 pos_+=length; 510 remainingMatchLength_-=length; 511 } 512 } 513 514 /** 515 * Resets this iterator to its initial state. 516 * @return this 517 */ 518 public Iterator reset() { 519 pos_=initialPos_; 520 remainingMatchLength_=initialRemainingMatchLength_; 521 int length=remainingMatchLength_+1; // Remaining match length. 522 if(maxLength_>0 && length>maxLength_) { 523 length=maxLength_; 524 } 525 entry_.truncateString(length); 526 pos_+=length; 527 remainingMatchLength_-=length; 528 stack_.clear(); 529 return this; 530 } 531 532 /** 533 * @return true if there are more elements. 534 */ 535 @Override 536 public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); } 537 538 /** 539 * Finds the next (byte sequence, value) pair if there is one. 540 * 541 * If the byte sequence is truncated to the maximum length and does not 542 * have a real value, then the value is set to -1. 543 * In this case, this "not a real value" is indistinguishable from 544 * a real value of -1. 545 * @return An Entry with the string and value of the next element. 546 * @throws NoSuchElementException - iteration has no more elements. 547 */ 548 @Override 549 public Entry next() { 550 int pos=pos_; 551 if(pos<0) { 552 if(stack_.isEmpty()) { 553 throw new NoSuchElementException(); 554 } 555 // Pop the state off the stack and continue with the next outbound edge of 556 // the branch node. 557 long top=stack_.remove(stack_.size()-1); 558 int length=(int)top; 559 pos=(int)(top>>32); 560 entry_.truncateString(length&0xffff); 561 length>>>=16; 562 if(length>1) { 563 pos=branchNext(pos, length); 564 if(pos<0) { 565 return entry_; // Reached a final value. 566 } 567 } else { 568 entry_.append(bytes_[pos++]); 569 } 570 } 571 if(remainingMatchLength_>=0) { 572 // We only get here if we started in a pending linear-match node 573 // with more than maxLength remaining bytes. 574 return truncateAndStop(); 575 } 576 for(;;) { 577 int node=bytes_[pos++]&0xff; 578 if(node>=kMinValueLead) { 579 // Deliver value for the byte sequence so far. 580 boolean isFinal=(node&kValueIsFinal)!=0; 581 entry_.value=readValue(bytes_, pos, node>>1); 582 if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) { 583 pos_=-1; 584 } else { 585 pos_=skipValue(pos, node); 586 } 587 return entry_; 588 } 589 if(maxLength_>0 && entry_.length==maxLength_) { 590 return truncateAndStop(); 591 } 592 if(node<kMinLinearMatch) { 593 if(node==0) { 594 node=bytes_[pos++]&0xff; 595 } 596 pos=branchNext(pos, node+1); 597 if(pos<0) { 598 return entry_; // Reached a final value. 599 } 600 } else { 601 // Linear-match node, append length bytes to entry_. 602 int length=node-kMinLinearMatch+1; 603 if(maxLength_>0 && entry_.length+length>maxLength_) { 604 entry_.append(bytes_, pos, maxLength_-entry_.length); 605 return truncateAndStop(); 606 } 607 entry_.append(bytes_, pos, length); 608 pos+=length; 609 } 610 } 611 } 612 613 /** 614 * Iterator.remove() is not supported. 615 * @throws UnsupportedOperationException (always) 616 */ 617 @Override 618 public void remove() { 619 throw new UnsupportedOperationException(); 620 } 621 622 private Entry truncateAndStop() { 623 pos_=-1; 624 entry_.value=-1; // no real value for str 625 return entry_; 626 } 627 628 private int branchNext(int pos, int length) { 629 while(length>kMaxBranchLinearSubNodeLength) { 630 ++pos; // ignore the comparison byte 631 // Push state for the greater-or-equal edge. 632 stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length); 633 // Follow the less-than edge. 634 length>>=1; 635 pos=jumpByDelta(bytes_, pos); 636 } 637 // List of key-value pairs where values are either final values or jump deltas. 638 // Read the first (key, value) pair. 639 byte trieByte=bytes_[pos++]; 640 int node=bytes_[pos++]&0xff; 641 boolean isFinal=(node&kValueIsFinal)!=0; 642 int value=readValue(bytes_, pos, node>>1); 643 pos=skipValue(pos, node); 644 stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length); 645 entry_.append(trieByte); 646 if(isFinal) { 647 pos_=-1; 648 entry_.value=value; 649 return -1; 650 } else { 651 return pos+value; 652 } 653 } 654 655 private byte[] bytes_; 656 private int pos_; 657 private int initialPos_; 658 private int remainingMatchLength_; 659 private int initialRemainingMatchLength_; 660 661 private int maxLength_; 662 private Entry entry_; 663 664 // The stack stores longs for backtracking to another 665 // outbound edge of a branch node. 666 // Each long has the offset from bytes_ in bits 62..32, 667 // the entry_.stringLength() from before the node in bits 15..0, 668 // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.) 669 // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24, 670 // but the code looks more confusing that way.) 671 private ArrayList<Long> stack_=new ArrayList<Long>(); 672 } 673 674 private void stop() { 675 pos_=-1; 676 } 677 678 // Reads a compact 32-bit integer. 679 // pos is already after the leadByte, and the lead byte is already shifted right by 1. 680 private static int readValue(byte[] bytes, int pos, int leadByte) { 681 int value; 682 if(leadByte<kMinTwoByteValueLead) { 683 value=leadByte-kMinOneByteValueLead; 684 } else if(leadByte<kMinThreeByteValueLead) { 685 value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff); 686 } else if(leadByte<kFourByteValueLead) { 687 value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff); 688 } else if(leadByte==kFourByteValueLead) { 689 value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff); 690 } else { 691 value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff); 692 } 693 return value; 694 } 695 private static int skipValue(int pos, int leadByte) { 696 assert(leadByte>=kMinValueLead); 697 if(leadByte>=(kMinTwoByteValueLead<<1)) { 698 if(leadByte<(kMinThreeByteValueLead<<1)) { 699 ++pos; 700 } else if(leadByte<(kFourByteValueLead<<1)) { 701 pos+=2; 702 } else { 703 pos+=3+((leadByte>>1)&1); 704 } 705 } 706 return pos; 707 } 708 private static int skipValue(byte[] bytes, int pos) { 709 int leadByte=bytes[pos++]&0xff; 710 return skipValue(pos, leadByte); 711 } 712 713 // Reads a jump delta and jumps. 714 private static int jumpByDelta(byte[] bytes, int pos) { 715 int delta=bytes[pos++]&0xff; 716 if(delta<kMinTwoByteDeltaLead) { 717 // nothing to do 718 } else if(delta<kMinThreeByteDeltaLead) { 719 delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff); 720 } else if(delta<kFourByteDeltaLead) { 721 delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff); 722 pos+=2; 723 } else if(delta==kFourByteDeltaLead) { 724 delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff); 725 pos+=3; 726 } else { 727 delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff); 728 pos+=4; 729 } 730 return pos+delta; 731 } 732 733 private static int skipDelta(byte[] bytes, int pos) { 734 int delta=bytes[pos++]&0xff; 735 if(delta>=kMinTwoByteDeltaLead) { 736 if(delta<kMinThreeByteDeltaLead) { 737 ++pos; 738 } else if(delta<kFourByteDeltaLead) { 739 pos+=2; 740 } else { 741 pos+=3+(delta&1); 742 } 743 } 744 return pos; 745 } 746 747 private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE }; 748 749 // Handles a branch node for both next(byte) and next(string). 750 private Result branchNext(int pos, int length, int inByte) { 751 // Branch according to the current byte. 752 if(length==0) { 753 length=bytes_[pos++]&0xff; 754 } 755 ++length; 756 // The length of the branch is the number of bytes to select from. 757 // The data structure encodes a binary search. 758 while(length>kMaxBranchLinearSubNodeLength) { 759 if(inByte<(bytes_[pos++]&0xff)) { 760 length>>=1; 761 pos=jumpByDelta(bytes_, pos); 762 } else { 763 length=length-(length>>1); 764 pos=skipDelta(bytes_, pos); 765 } 766 } 767 // Drop down to linear search for the last few bytes. 768 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 769 // and divides length by 2. 770 do { 771 if(inByte==(bytes_[pos++]&0xff)) { 772 Result result; 773 int node=bytes_[pos]&0xff; 774 assert(node>=kMinValueLead); 775 if((node&kValueIsFinal)!=0) { 776 // Leave the final value for getValue() to read. 777 result=Result.FINAL_VALUE; 778 } else { 779 // Use the non-final value as the jump delta. 780 ++pos; 781 // int delta=readValue(pos, node>>1); 782 node>>=1; 783 int delta; 784 if(node<kMinTwoByteValueLead) { 785 delta=node-kMinOneByteValueLead; 786 } else if(node<kMinThreeByteValueLead) { 787 delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff); 788 } else if(node<kFourByteValueLead) { 789 delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff); 790 pos+=2; 791 } else if(node==kFourByteValueLead) { 792 delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff); 793 pos+=3; 794 } else { 795 delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff); 796 pos+=4; 797 } 798 // end readValue() 799 pos+=delta; 800 node=bytes_[pos]&0xff; 801 result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 802 } 803 pos_=pos; 804 return result; 805 } 806 --length; 807 pos=skipValue(bytes_, pos); 808 } while(length>1); 809 if(inByte==(bytes_[pos++]&0xff)) { 810 pos_=pos; 811 int node=bytes_[pos]&0xff; 812 return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 813 } else { 814 stop(); 815 return Result.NO_MATCH; 816 } 817 } 818 819 // Requires remainingLength_<0. 820 private Result nextImpl(int pos, int inByte) { 821 for(;;) { 822 int node=bytes_[pos++]&0xff; 823 if(node<kMinLinearMatch) { 824 return branchNext(pos, node, inByte); 825 } else if(node<kMinValueLead) { 826 // Match the first of length+1 bytes. 827 int length=node-kMinLinearMatch; // Actual match length minus 1. 828 if(inByte==(bytes_[pos++]&0xff)) { 829 remainingMatchLength_=--length; 830 pos_=pos; 831 return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ? 832 valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 833 } else { 834 // No match. 835 break; 836 } 837 } else if((node&kValueIsFinal)!=0) { 838 // No further matching bytes. 839 break; 840 } else { 841 // Skip intermediate value. 842 pos=skipValue(pos, node); 843 // The next node must not also be a value node. 844 assert((bytes_[pos]&0xff)<kMinValueLead); 845 } 846 } 847 stop(); 848 return Result.NO_MATCH; 849 } 850 851 // Helper functions for getUniqueValue(). 852 // Recursively finds a unique value (or whether there is not a unique one) 853 // from a branch. 854 // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue(). 855 // On return, if not 0, then bits 63..33 contain the updated non-negative pos. 856 private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length, 857 long uniqueValue) { 858 while(length>kMaxBranchLinearSubNodeLength) { 859 ++pos; // ignore the comparison byte 860 uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue); 861 if(uniqueValue==0) { 862 return 0; 863 } 864 length=length-(length>>1); 865 pos=skipDelta(bytes, pos); 866 } 867 do { 868 ++pos; // ignore a comparison byte 869 // handle its value 870 int node=bytes[pos++]&0xff; 871 boolean isFinal=(node&kValueIsFinal)!=0; 872 int value=readValue(bytes, pos, node>>1); 873 pos=skipValue(pos, node); 874 if(isFinal) { 875 if(uniqueValue!=0) { 876 if(value!=(int)(uniqueValue>>1)) { 877 return 0; 878 } 879 } else { 880 uniqueValue=((long)value<<1)|1; 881 } 882 } else { 883 uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue); 884 if(uniqueValue==0) { 885 return 0; 886 } 887 } 888 } while(--length>1); 889 // ignore the last comparison byte 890 return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL); 891 } 892 // Recursively finds a unique value (or whether there is not a unique one) 893 // starting from a position on a node lead byte. 894 // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set. 895 // Otherwise, uniqueValue is 0. Bits 63..33 are ignored. 896 private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) { 897 for(;;) { 898 int node=bytes[pos++]&0xff; 899 if(node<kMinLinearMatch) { 900 if(node==0) { 901 node=bytes[pos++]&0xff; 902 } 903 uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue); 904 if(uniqueValue==0) { 905 return 0; 906 } 907 pos=(int)(uniqueValue>>>33); 908 } else if(node<kMinValueLead) { 909 // linear-match node 910 pos+=node-kMinLinearMatch+1; // Ignore the match bytes. 911 } else { 912 boolean isFinal=(node&kValueIsFinal)!=0; 913 int value=readValue(bytes, pos, node>>1); 914 if(uniqueValue!=0) { 915 if(value!=(int)(uniqueValue>>1)) { 916 return 0; 917 } 918 } else { 919 uniqueValue=((long)value<<1)|1; 920 } 921 if(isFinal) { 922 return uniqueValue; 923 } 924 pos=skipValue(pos, node); 925 } 926 } 927 } 928 929 // Helper functions for getNextBytes(). 930 // getNextBytes() when pos is on a branch node. 931 private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) { 932 while(length>kMaxBranchLinearSubNodeLength) { 933 ++pos; // ignore the comparison byte 934 getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out); 935 length=length-(length>>1); 936 pos=skipDelta(bytes, pos); 937 } 938 do { 939 append(out, bytes[pos++]&0xff); 940 pos=skipValue(bytes, pos); 941 } while(--length>1); 942 append(out, bytes[pos]&0xff); 943 } 944 private static void append(Appendable out, int c) { 945 try { 946 out.append((char)c); 947 } catch(IOException e) { 948 throw new ICUUncheckedIOException(e); 949 } 950 } 951 952 // BytesTrie data structure 953 // 954 // The trie consists of a series of byte-serialized nodes for incremental 955 // string/byte sequence matching. The root node is at the beginning of the trie data. 956 // 957 // Types of nodes are distinguished by their node lead byte ranges. 958 // After each node, except a final-value node, another node follows to 959 // encode match values or continue matching further bytes. 960 // 961 // Node types: 962 // - Value node: Stores a 32-bit integer in a compact, variable-length format. 963 // The value is for the string/byte sequence so far. 964 // One node bit indicates whether the value is final or whether 965 // matching continues with the next node. 966 // - Linear-match node: Matches a number of bytes. 967 // - Branch node: Branches to other nodes according to the current input byte. 968 // The node byte is the length of the branch (number of bytes to select from) 969 // minus 1. It is followed by a sub-node: 970 // - If the length is at most kMaxBranchLinearSubNodeLength, then 971 // there are length-1 (key, value) pairs and then one more comparison byte. 972 // If one of the key bytes matches, then the value is either a final value for 973 // the string/byte sequence so far, or a "jump" delta to the next node. 974 // If the last byte matches, then matching continues with the next node. 975 // (Values have the same encoding as value nodes.) 976 // - If the length is greater than kMaxBranchLinearSubNodeLength, then 977 // there is one byte and one "jump" delta. 978 // If the input byte is less than the sub-node byte, then "jump" by delta to 979 // the next sub-node which will have a length of length/2. 980 // (The delta has its own compact encoding.) 981 // Otherwise, skip the "jump" delta to the next sub-node 982 // which will have a length of length-length/2. 983 984 // Node lead byte values. 985 986 // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise 987 // the length is one more than the next byte. 988 989 // For a branch sub-node with at most this many entries, we drop down 990 // to a linear search. 991 /*package*/ static final int kMaxBranchLinearSubNodeLength=5; 992 993 // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node. 994 /*package*/ static final int kMinLinearMatch=0x10; 995 /*package*/ static final int kMaxLinearMatchLength=0x10; 996 997 // 20..ff: Variable-length value node. 998 // If odd, the value is final. (Otherwise, intermediate value or jump delta.) 999 // Then shift-right by 1 bit. 1000 // The remaining lead byte value indicates the number of following bytes (0..4) 1001 // and contains the value's top bits. 1002 /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x20 1003 // It is a final value if bit 0 is set. 1004 private static final int kValueIsFinal=1; 1005 1006 // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds. 1007 /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2; // 0x10 1008 /*package*/ static final int kMaxOneByteValue=0x40; // At least 6 bits in the first byte. 1009 1010 /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1; // 0x51 1011 /*package*/ static final int kMaxTwoByteValue=0x1aff; 1012 1013 /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1; // 0x6c 1014 /*package*/ static final int kFourByteValueLead=0x7e; 1015 1016 // A little more than Unicode code points. (0x11ffff) 1017 /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1; 1018 1019 /*package*/ static final int kFiveByteValueLead=0x7f; 1020 1021 // Compact delta integers. 1022 /*package*/ static final int kMaxOneByteDelta=0xbf; 1023 /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0 1024 /*package*/ static final int kMinThreeByteDeltaLead=0xf0; 1025 /*package*/ static final int kFourByteDeltaLead=0xfe; 1026 /*package*/ static final int kFiveByteDeltaLead=0xff; 1027 1028 /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff 1029 /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff 1030 1031 // Fixed value referencing the BytesTrie bytes. 1032 private byte[] bytes_; 1033 private int root_; 1034 1035 // Iterator variables. 1036 1037 // Index of next trie byte to read. Negative if no more matches. 1038 private int pos_; 1039 // Remaining length of a linear-match node, minus 1. Negative if not in such a node. 1040 private int remainingMatchLength_; 1041}; 1042