12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Copyright (C) 2010-2014, International Business Machines 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Corporation and others. All Rights Reserved. 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* CollationIterator.java, ported from collationiterator.h/.cpp 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C++ version created on: 2010oct27 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* created by: Markus W. Scherer 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/ 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl.coll; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Normalizer2Impl.Hangul; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Trie2_32; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.BytesTrie; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.CharsTrie; 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUException; 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Collation element iterator and abstract character iterator. 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * When a method returns a code point value, it must be in 0..10FFFF, 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * except it can be negative as a sentinel value. 28836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic abstract class CollationIterator { 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final class CEBuffer { 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Large enough for CEs of most short strings. */ 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int INITIAL_CAPACITY = 40; 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CEBuffer() {} 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void append(long ce) { 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length >= INITIAL_CAPACITY) { 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureAppendCapacity(1); 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[length++] = ce; 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void appendUnsafe(long ce) { 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[length++] = ce; 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void ensureAppendCapacity(int appCap) { 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int capacity = buffer.length; 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((length + appCap) <= capacity) { return; } 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(capacity < 1000) { 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller capacity *= 4; 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller capacity *= 2; 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while(capacity < (length + appCap)); 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] newBuffer = new long[capacity]; 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(buffer, 0, newBuffer, 0, length); 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer = newBuffer; 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void incLength() { 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Use INITIAL_CAPACITY for a very simple fastpath. 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (Rather than buffer.getCapacity().) 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length >= INITIAL_CAPACITY) { 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureAppendCapacity(1); 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++length; 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long set(int i, long ce) { 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer[i] = ce; 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long get(int i) { return buffer[i]; } 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] getCEs() { return buffer; } 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = 0; 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private long[] buffer = new long[INITIAL_CAPACITY]; 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // State of combining marks skipped in discontiguous contraction. 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We create a state object on first use and keep it around deactivated between uses. 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final class SkippedState { 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Born active but empty. 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SkippedState() {} 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void clear() { 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldBuffer.setLength(0); 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = 0; 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The newBuffer is reset by setFirstSkipped(). 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isEmpty() { return oldBuffer.length() == 0; } 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean hasNext() { return pos < oldBuffer.length(); } 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Requires hasNext(). 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int next() { 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = oldBuffer.codePointAt(pos); 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos += Character.charCount(c); 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Accounts for one more input code point read beyond the end of the marks buffer. 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void incBeyond() { 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(!hasNext()); 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++pos; 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Goes backward through the skipped-marks buffer. 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Returns the number of code points read beyond the skipped marks 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that need to be backtracked through normal input. 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int backwardNumCodePoints(int n) { 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = oldBuffer.length(); 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int beyond = pos - length; 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(beyond > 0) { 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(beyond >= n) { 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Not back far enough to re-enter the oldBuffer. 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos -= n; 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return n; 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Back out all beyond-oldBuffer code points and re-enter the buffer. 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = oldBuffer.offsetByCodePoints(length, beyond - n); 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return beyond; 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Go backwards from inside the oldBuffer. 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = oldBuffer.offsetByCodePoints(pos, -n); 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void setFirstSkipped(int c) { 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipLengthAtMatch = 0; 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newBuffer.setLength(0); 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newBuffer.appendCodePoint(c); 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void skip(int c) { 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newBuffer.appendCodePoint(c); 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void recordMatch() { skipLengthAtMatch = newBuffer.length(); } 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Replaces the characters we consumed with the newly skipped ones. 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void replaceMatch() { 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: UnicodeString.replace() pins pos to at most length(). 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int oldLength = oldBuffer.length(); 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(pos > oldLength) { pos = oldLength; } 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldBuffer.delete(0, pos).insert(0, newBuffer, 0, skipLengthAtMatch); 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = 0; 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void saveTrieState(CharsTrie trie) { trie.saveState(state); } 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void resetToTrieState(CharsTrie trie) { trie.resetToState(state); } 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Combining marks skipped in previous discontiguous-contraction matching. 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // After that discontiguous contraction was completed, we start reading them from here. 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final StringBuilder oldBuffer = new StringBuilder(); 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Combining marks newly skipped in current discontiguous-contraction matching. 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // These might have been read from the normal text or from the oldBuffer. 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final StringBuilder newBuffer = new StringBuilder(); 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reading index in oldBuffer, 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // or counter for how many code points have been read beyond oldBuffer (pos-oldBuffer.length()). 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int pos; 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // newBuffer.length() at the time of the last matching character. 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // When a partial match fails, we back out skipped and partial-matching input characters. 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int skipLengthAtMatch; 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We save the trie state before we attempt to match a character, 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so that we can skip it and try the next one. 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private CharsTrie.State state = new CharsTrie.State(); 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Partially constructs the iterator. 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * In Java, we cache partially constructed iterators 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and finish their setup when starting to work on text 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (via reset(boolean) and the setText(numeric, ...) methods of subclasses). 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This avoids memory allocations for iterators that remain unused. 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>In C++, there is only one constructor, and iterators are 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * stack-allocated as needed. 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public CollationIterator(CollationData d) { 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller trie = d.trie; 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller data = d; 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numCpFwd = -1; 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isNumeric = false; 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer = null; 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public CollationIterator(CollationData d, boolean numeric) { 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller trie = d.trie; 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller data = d; 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numCpFwd = -1; 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isNumeric = numeric; 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer = new CEBuffer(); 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Override 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean equals(Object other) { 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Subclasses: Call this method and then add more specific checks. 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Compare the iterator state but not the collation data (trie & data fields): 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Assume that the caller compares the data. 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Ignore skipped since that should be unused between calls to nextCE(). 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (It only stays around to avoid another memory allocation.) 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(other == null) { return false; } 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!this.getClass().equals(other.getClass())) { return false; } 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationIterator o = (CollationIterator)other; 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!(ceBuffer.length == o.ceBuffer.length && 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesIndex == o.cesIndex && 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numCpFwd == o.numCpFwd && 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isNumeric == o.isNumeric)) { 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < ceBuffer.length; ++i) { 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ceBuffer.get(i) != o.ceBuffer.get(i)) { return false; } 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 224f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 225f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert public int hashCode() { 226f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert // Dummy return to prevent compile warnings. 227f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return 0; 228f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 229f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Resets the iterator state and sets the position to the specified offset. 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Subclasses must implement, and must call the parent class method, 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or CollationIterator.reset(). 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public abstract void resetToOffset(int newOffset); 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public abstract int getOffset(); 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the next collation element. 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final long nextCE() { 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cesIndex < ceBuffer.length) { 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Return the next buffered CE. 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.get(cesIndex++); 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert cesIndex == ceBuffer.length; 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.incLength(); 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long cAndCE32 = handleNextCE32(); 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = (int)(cAndCE32 >> 32); 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ce32 = (int)cAndCE32; 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int t = ce32 & 0xff; 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(t < Collation.SPECIAL_CE32_LOW_BYTE) { // Forced-inline of isSpecialCE32(ce32). 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Normal CE from the main data. 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Forced-inline of ceFromSimpleCE32(ce32). 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.set(cesIndex++, 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (t << 8)); 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationData d; 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The compiler should be able to optimize the previous and the following 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // comparisons of t with the same constant. 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(t == Collation.SPECIAL_CE32_LOW_BYTE) { 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c < 0) { 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.set(cesIndex++, Collation.NO_CE); 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = data.base; 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.getCE32(c); 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t = ce32 & 0xff; 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(t < Collation.SPECIAL_CE32_LOW_BYTE) { 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Normal CE from the base data. 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.set(cesIndex++, 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (t << 8)); 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = data; 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(t == Collation.LONG_PRIMARY_CE32_LOW_BYTE) { 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Forced-inline of ceFromLongPrimaryCE32(ce32). 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.set(cesIndex++, 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((long)(ce32 - t) << 32) | Collation.COMMON_SEC_AND_TER_CE); 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return nextCEFromCE32(d, c, ce32); 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Fetches all CEs. 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return getCEsLength() 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int fetchCEs() { 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(nextCE() != Collation.NO_CE) { 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No need to loop for each expansion CE. 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesIndex = ceBuffer.length; 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.length; 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Overwrites the current CE (the last one returned by nextCE()). 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller final void setCurrentCE(long ce) { 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert cesIndex > 0; 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.set(cesIndex - 1, ce); 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the previous collation element. 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final long previousCE(UVector32 offsets) { 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ceBuffer.length > 0) { 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Return the previous buffered CE. 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.get(--ceBuffer.length); 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offsets.removeAllElements(); 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limitOffset = getOffset(); 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = previousCodePoint(); 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c < 0) { return Collation.NO_CE; } 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(data.isUnsafeBackward(c, isNumeric)) { 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return previousCEUnsafe(c, offsets); 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Simple, safe-backwards iteration: 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Get a CE going backwards, handle prefixes but no contractions. 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ce32 = data.getCE32(c); 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationData d; 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.FALLBACK_CE32) { 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = data.base; 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.getCE32(c); 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = data; 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Collation.isSimpleOrLongCE32(ce32)) { 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.ceFromCE32(ce32); 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendCEsFromCE32(d, c, ce32, false); 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ceBuffer.length > 1) { 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offsets.addElement(getOffset()); 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // For an expansion, the offset of each non-initial CE is the limit offset, 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // consistent with forward iteration. 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(offsets.size() <= ceBuffer.length) { 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offsets.addElement(limitOffset); 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.get(--ceBuffer.length); 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int getCEsLength() { 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.length; 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final long getCE(int i) { 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.get(i); 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final long[] getCEs() { 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.getCEs(); 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller final void clearCEs() { 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesIndex = ceBuffer.length = 0; 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final void clearCEsIfNoneRemaining() { 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cesIndex == ceBuffer.length) { clearCEs(); } 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the next code point (with post-increment). 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Public for identical-level comparison and for testing. 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public abstract int nextCodePoint(); 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the previous code point (with pre-decrement). 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Public for identical-level comparison and for testing. 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public abstract int previousCodePoint(); 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected final void reset() { 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesIndex = ceBuffer.length = 0; 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped != null) { skipped.clear(); } 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Resets the state as well as the numeric setting, 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and completes the initialization. 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Only exists in Java where we reset cached CollationIterator instances 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * rather than stack-allocating temporary ones. 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (See also the constructor comments.) 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected final void reset(boolean numeric) { 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ceBuffer == null) { 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer = new CEBuffer(); 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reset(); 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isNumeric = numeric; 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the next code point and its local CE32 value. 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns Collation.FALLBACK_CE32 at the end of the text (c<0) 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or when c's CE32 value is to be looked up in the base data (fallback). 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The code point is used for fallbacks, context and implicit weights. 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It is ignored when the returned CE32 is not special (e.g., FFFD_CE32). 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the code point in bits 63..32 (signed) and the CE32 in bits 31..0. 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected long handleNextCE32() { 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = nextCodePoint(); 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c < 0) { return NO_CP_AND_CE32; } 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return makeCodePointAndCE32Pair(c, data.getCE32(c)); 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected long makeCodePointAndCE32Pair(int c, int ce32) { 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((long)c << 32) | (ce32 & 0xffffffffL); 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected static final long NO_CP_AND_CE32 = (-1L << 32) | (Collation.FALLBACK_CE32 & 0xffffffffL); 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Called when handleNextCE32() returns a LEAD_SURROGATE_TAG for a lead surrogate code unit. 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the trail surrogate in that case and advances past it, 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if a trail surrogate follows the lead surrogate. 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Otherwise returns any other code unit and does not advance. 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected char handleGetTrailSurrogate() { 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Called when handleNextCE32() returns with c==0, to see whether it is a NUL terminator. 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (Not needed in Java.) 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*protected boolean foundNULTerminator() { 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }*/ 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return false if surrogate code points U+D800..U+DFFF 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * map to their own implicit primary weights (for UTF-16), 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or true if they map to CE(U+FFFD) (for UTF-8) 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected boolean forbidSurrogateCodePoints() { 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected abstract void forwardNumCodePoints(int num); 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected abstract void backwardNumCodePoints(int num); 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the CE32 from the data trie. 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Normally the same as data.getCE32(), but overridden in the builder. 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Call this only when the faster data.getCE32() cannot be used. 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected int getDataCE32(int c) { 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return data.getCE32(c); 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected int getCE32FromBuilderData(int ce32) { 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUException("internal program error: should be unreachable"); 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected final void appendCEsFromCE32(CollationData d, int c, int ce32, 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean forward) { 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(Collation.isSpecialCE32(ce32)) { 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(Collation.tagFromCE32(ce32)) { 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.FALLBACK_TAG: 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.RESERVED_TAG_3: 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUException("internal program error: should be unreachable"); 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.LONG_PRIMARY_TAG: 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.ceFromLongPrimaryCE32(ce32)); 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.LONG_SECONDARY_TAG: 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.ceFromLongSecondaryCE32(ce32)); 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.LATIN_EXPANSION_TAG: 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.ensureAppendCapacity(2); 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.set(ceBuffer.length, Collation.latinCE0FromCE32(ce32)); 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.set(ceBuffer.length + 1, Collation.latinCE1FromCE32(ce32)); 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.length += 2; 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.EXPANSION32_TAG: { 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = Collation.indexFromCE32(ce32); 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = Collation.lengthFromCE32(ce32); 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.ensureAppendCapacity(length); 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.appendUnsafe(Collation.ceFromCE32(d.ce32s[index++])); 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while(--length > 0); 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.EXPANSION_TAG: { 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = Collation.indexFromCE32(ce32); 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = Collation.lengthFromCE32(ce32); 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.ensureAppendCapacity(length); 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.appendUnsafe(d.ces[index++]); 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while(--length > 0); 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.BUILDER_DATA_TAG: 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = getCE32FromBuilderData(ce32); 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.FALLBACK_CE32) { 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = data.base; 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.getCE32(c); 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.PREFIX_TAG: 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(forward) { backwardNumCodePoints(1); } 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = getCE32FromPrefix(d, ce32); 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(forward) { forwardNumCodePoints(1); } 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.CONTRACTION_TAG: { 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = Collation.indexFromCE32(ce32); 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int defaultCE32 = d.getCE32FromContexts(index); // Default if no suffix match. 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!forward) { 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Backward contractions are handled by previousCEUnsafe(). 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c has contractions but they were not found. 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = defaultCE32; 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextCp; 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped == null && numCpFwd < 0) { 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Some portion of nextCE32FromContraction() pulled out here as an ASCII fast path, 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // avoiding the function call and the nextSkippedCodePoint() overhead. 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextCp = nextCodePoint(); 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextCp < 0) { 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No more text. 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = defaultCE32; 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if((ce32 & Collation.CONTRACT_NEXT_CCC) != 0 && 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller !CollationFCD.mayHaveLccc(nextCp)) { 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All contraction suffixes start with characters with lccc!=0 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but the next code point has lccc==0. 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumCodePoints(1); 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = defaultCE32; 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextCp = nextSkippedCodePoint(); 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextCp < 0) { 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No more text. 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = defaultCE32; 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if((ce32 & Collation.CONTRACT_NEXT_CCC) != 0 && 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller !CollationFCD.mayHaveLccc(nextCp)) { 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All contraction suffixes start with characters with lccc!=0 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but the next code point has lccc==0. 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumSkipped(1); 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = defaultCE32; 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = nextCE32FromContraction(d, ce32, d.contexts, index + 2, defaultCE32, nextCp); 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.NO_CE32) { 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // CEs from a discontiguous contraction plus the skipped combining marks 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // have been appended already. 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.DIGIT_TAG: 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isNumeric) { 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendNumericCEs(ce32, forward); 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fetch the non-numeric-collation CE32 and continue. 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.ce32s[Collation.indexFromCE32(ce32)]; 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.U0000_TAG: 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(c == 0); 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // NUL-terminated input not supported in Java. 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fetch the normal ce32 for U+0000 and continue. 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.ce32s[0]; 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.HANGUL_TAG: { 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] jamoCE32s = d.jamoCE32s; 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c -= Hangul.HANGUL_BASE; 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int t = c % Hangul.JAMO_T_COUNT; 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c /= Hangul.JAMO_T_COUNT; 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int v = c % Hangul.JAMO_V_COUNT; 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c /= Hangul.JAMO_V_COUNT; 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((ce32 & Collation.HANGUL_NO_SPECIAL_JAMO) != 0) { 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // None of the Jamo CE32s are isSpecialCE32(). 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Avoid recursive function calls and per-Jamo tests. 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.ensureAppendCapacity(t == 0 ? 2 : 3); 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.set(ceBuffer.length, Collation.ceFromCE32(jamoCE32s[c])); 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.set(ceBuffer.length + 1, Collation.ceFromCE32(jamoCE32s[19 + v])); 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.length += 2; 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(t != 0) { 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.appendUnsafe(Collation.ceFromCE32(jamoCE32s[39 + t])); 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We should not need to compute each Jamo code point. 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In particular, there should be no offset or implicit ce32. 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendCEsFromCE32(d, Collation.SENTINEL_CP, jamoCE32s[c], forward); 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendCEsFromCE32(d, Collation.SENTINEL_CP, jamoCE32s[19 + v], forward); 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(t == 0) { return; } 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // offset 39 = 19 + 21 - 1: 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 19 = JAMO_L_COUNT 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 21 = JAMO_T_COUNT 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // -1 = omit t==0 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = jamoCE32s[39 + t]; 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = Collation.SENTINEL_CP; 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.LEAD_SURROGATE_TAG: { 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(forward); // Backward iteration should never see lead surrogate code _unit_ data. 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(isLeadSurrogate(c)); 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char trail; 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Character.isLowSurrogate(trail = handleGetTrailSurrogate())) { 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = Character.toCodePoint((char)c, trail); 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 &= Collation.LEAD_TYPE_MASK; 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.LEAD_ALL_UNASSIGNED) { 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = Collation.UNASSIGNED_CE32; // unassigned-implicit 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(ce32 == Collation.LEAD_ALL_FALLBACK || 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (ce32 = d.getCE32FromSupplementary(c)) == Collation.FALLBACK_CE32) { 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fall back to the base data 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = d.base; 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.getCE32FromSupplementary(c); 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is an unpaired surrogate. 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = Collation.UNASSIGNED_CE32; 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.OFFSET_TAG: 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(c >= 0); 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(d.getCEFromOffsetCE32(c, ce32)); 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.IMPLICIT_TAG: 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(c >= 0); 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isSurrogate(c) && forbidSurrogateCodePoints()) { 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = Collation.FFFD_CE32; 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.unassignedCEFromCodePoint(c)); 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.ceFromSimpleCE32(ce32)); 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose widening the UTF16 method. 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final boolean isSurrogate(int c) { 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (c & 0xfffff800) == 0xd800; 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose widening the UTF16 method. 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected static final boolean isLeadSurrogate(int c) { 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (c & 0xfffffc00) == 0xd800; 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose widening the UTF16 method. 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected static final boolean isTrailSurrogate(int c) { 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (c & 0xfffffc00) == 0xdc00; 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Main lookup trie of the data object. 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected final Trie2_32 trie; 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected final CollationData data; 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final long nextCEFromCE32(CollationData d, int c, int ce32) { 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --ceBuffer.length; // Undo ceBuffer.incLength(). 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendCEsFromCE32(d, c, ce32, true); 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.get(cesIndex++); 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int getCE32FromPrefix(CollationData d, int ce32) { 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = Collation.indexFromCE32(ce32); 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.getCE32FromContexts(index); // Default if no prefix match. 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index += 2; 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Number of code points read before the original code point. 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lookBehind = 0; 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharsTrie prefixes = new CharsTrie(d.contexts, index); 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = previousCodePoint(); 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c < 0) { break; } 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++lookBehind; 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller BytesTrie.Result match = prefixes.nextForCodePoint(c); 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(match.hasValue()) { 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = prefixes.getValue(); 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!match.hasNext()) { break; } 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardNumCodePoints(lookBehind); 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int nextSkippedCodePoint() { 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped != null && skipped.hasNext()) { return skipped.next(); } 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numCpFwd == 0) { return Collation.SENTINEL_CP; } 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = nextCodePoint(); 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped != null && !skipped.isEmpty() && c >= 0) { skipped.incBeyond(); } 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numCpFwd > 0 && c >= 0) { --numCpFwd; } 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final void backwardNumSkipped(int n) { 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped != null && !skipped.isEmpty()) { 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = skipped.backwardNumCodePoints(n); 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumCodePoints(n); 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numCpFwd >= 0) { numCpFwd += n; } 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int nextCE32FromContraction( 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationData d, int contractionCE32, 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharSequence trieChars, int trieOffset, int ce32, int c) { 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c: next code point after the original one 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Number of code points read beyond the original code point. 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Needed for discontiguous contraction matching. 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lookAhead = 1; 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Number of code points read since the last match (initially only c). 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sinceMatch = 1; 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Normally we only need a contiguous match, 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and therefore need not remember the suffixes state from before a mismatch for retrying. 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we are already processing skipped combining marks, then we do track the state. 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharsTrie suffixes = new CharsTrie(trieChars, trieOffset); 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped != null && !skipped.isEmpty()) { skipped.saveTrieState(suffixes); } 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller BytesTrie.Result match = suffixes.firstForCodePoint(c); 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextCp; 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(match.hasValue()) { 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = suffixes.getValue(); 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!match.hasNext() || (c = nextSkippedCodePoint()) < 0) { 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped != null && !skipped.isEmpty()) { skipped.saveTrieState(suffixes); } 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sinceMatch = 1; 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(match == BytesTrie.Result.NO_MATCH || (nextCp = nextSkippedCodePoint()) < 0) { 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No match for c, or partial match (BytesTrie.Result.NO_VALUE) and no further text. 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Back up if necessary, and try a discontiguous contraction. 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((contractionCE32 & Collation.CONTRACT_TRAILING_CCC) != 0 && 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Discontiguous contraction matching extends an existing match. 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there is no match yet, then there is nothing to do. 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((contractionCE32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) == 0 || 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sinceMatch < lookAhead)) { 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The last character of at least one suffix has lccc!=0, 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // allowing for discontiguous contractions. 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UCA S2.1.1 only processes non-starters immediately following 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // "a match in the table" (sinceMatch=1). 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sinceMatch > 1) { 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Return to the state after the last match. 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (Return to sinceMatch=0 and re-fetch the first partially-matched character.) 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumSkipped(sinceMatch); 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = nextSkippedCodePoint(); 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lookAhead -= sinceMatch - 1; 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sinceMatch = 1; 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(d.getFCD16(c) > 0xff) { 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return nextCE32FromDiscontiguousContraction( 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d, suffixes, ce32, lookAhead, c); 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Continue after partial match (BytesTrie.Result.NO_VALUE) for c. 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It does not have a result value, therefore it is not itself "a match in the table". 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If a partially-matched c has ccc!=0 then 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // it might be skipped in discontiguous contraction. 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = nextCp; 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++sinceMatch; 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++lookAhead; 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller match = suffixes.nextForCodePoint(c); 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumSkipped(sinceMatch); 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int nextCE32FromDiscontiguousContraction( 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationData d, CharsTrie suffixes, int ce32, 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lookAhead, int c) { 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UCA section 3.3.2 Contractions: 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Contractions that end with non-starter characters 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // are known as discontiguous contractions. 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ... discontiguous contractions must be detected in input text 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // whenever the final sequence of non-starter characters could be rearranged 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so as to make a contiguous matching sequence that is canonically equivalent. 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UCA: http://www.unicode.org/reports/tr10/#S2.1 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // S2.1 Find the longest initial substring S at each point that has a match in the table. 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // S2.1.1 If there are any non-starters following S, process each non-starter C. 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // S2.1.2 If C is not blocked from S, find if S + C has a match in the table. 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: A non-starter in a string is called blocked 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if there is another non-starter of the same canonical combining class or zero 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // between it and the last character of canonical combining class 0. 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // S2.1.3 If there is a match, replace S by S + C, and remove C. 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // First: Is a discontiguous contraction even possible? 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16 = d.getFCD16(c); 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(fcd16 > 0xff); // The caller checked this already, as a shortcut. 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextCp = nextSkippedCodePoint(); 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextCp < 0) { 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No further text. 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumSkipped(1); 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++lookAhead; 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevCC = fcd16 & 0xff; 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fcd16 = d.getFCD16(nextCp); 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(fcd16 <= 0xff) { 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The next code point after c is a starter (S2.1.1 "process each non-starter"). 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumSkipped(2); 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have read and matched (lookAhead-2) code points, 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // read non-matching c and peeked ahead at nextCp. 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Return to the state before the mismatch and continue matching with nextCp. 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped == null || skipped.isEmpty()) { 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(skipped == null) { 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped = new SkippedState(); 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller suffixes.reset(); 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lookAhead > 2) { 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Replay the partial match so far. 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumCodePoints(lookAhead); 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller suffixes.firstForCodePoint(nextCodePoint()); 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 3; i < lookAhead; ++i) { 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller suffixes.nextForCodePoint(nextCodePoint()); 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip c (which did not match) and nextCp (which we will try now). 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardNumCodePoints(2); 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.saveTrieState(suffixes); 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reset to the trie state before the failed match of c. 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.resetToTrieState(suffixes); 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.setFirstSkipped(c); 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Number of code points read since the last match (at this point: c and nextCp). 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sinceMatch = 2; 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = nextCp; 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller BytesTrie.Result match; 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // "If C is not blocked from S, find if S + C has a match in the table." (S2.1.2) 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevCC < (fcd16 >> 8) && (match = suffixes.nextForCodePoint(c)).hasValue()) { 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // "If there is a match, replace S by S + C, and remove C." (S2.1.3) 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Keep prevCC unchanged. 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = suffixes.getValue(); 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sinceMatch = 0; 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.recordMatch(); 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!match.hasNext()) { break; } 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.saveTrieState(suffixes); 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No match for "S + C", skip C. 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.skip(c); 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.resetToTrieState(suffixes); 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC = fcd16 & 0xff; 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((c = nextSkippedCodePoint()) < 0) { break; } 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++sinceMatch; 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fcd16 = d.getFCD16(c); 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(fcd16 <= 0xff) { 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The next code point after c is a starter (S2.1.1 "process each non-starter"). 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumSkipped(sinceMatch); 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isTopDiscontiguous = skipped.isEmpty(); 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.replaceMatch(); 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTopDiscontiguous && !skipped.isEmpty()) { 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We did get a match after skipping one or more combining marks, 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and we are not in a recursive discontiguous contraction. 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Append CEs from the contraction ce32 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and then from the combining marks that we skipped before the match. 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = Collation.SENTINEL_CP; 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendCEsFromCE32(d, c, ce32, true); 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fetch CE32s for skipped combining marks from the normal data, with fallback, 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // rather than from the CollationData where we found the contraction. 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!skipped.hasNext()) { break; } 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = skipped.next(); 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = getDataCE32(c); 8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.FALLBACK_CE32) { 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = data.base; 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.getCE32(c); 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = data; 8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: A nested discontiguous-contraction match 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // replaces consumed combining marks with newly skipped ones 8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and resets the reading position to the beginning. 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skipped.clear(); 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = Collation.NO_CE32; // Signal to the caller that the result is in the ceBuffer. 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the previous CE when data.isUnsafeBackward(c, isNumeric). 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final long previousCEUnsafe(int c, UVector32 offsets) { 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We just move through the input counting safe and unsafe code points 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // without collecting the unsafe-backward substring into a buffer and 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // switching to it. 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This is to keep the logic simple. Otherwise we would have to handle 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // prefix matching going before the backward buffer, switching 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to iteration and back, etc. 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In the most important case of iterating over a normal string, 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // reading from the string itself is already maximally fast. 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The only drawback there is that after getting the CEs we always 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // skip backward to the safe character rather than switching out 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // of a backwardBuffer. 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // But this should not be the common case for previousCE(), 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and correctness and maintainability are more important than 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // complex optimizations. 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the first safe character before c. 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numBackward = 1; 9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while((c = previousCodePoint()) >= 0) { 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++numBackward; 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!data.isUnsafeBackward(c, isNumeric)) { 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set the forward iteration limit. 9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: This counts code points. 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We cannot enforce a limit in the middle of a surrogate pair or similar. 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numCpFwd = numBackward; 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reset the forward iterator. 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesIndex = 0; 9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(ceBuffer.length == 0); 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Go forward and collect the CEs. 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int offset = getOffset(); 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(numCpFwd > 0) { 9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // nextCE() normally reads one code point. 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Contraction matching and digit specials read more and check numCpFwd. 9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --numCpFwd; 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Append one or more CEs to the ceBuffer. 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextCE(); 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(ceBuffer.get(ceBuffer.length - 1) != Collation.NO_CE); 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No need to loop for getting each expansion CE from nextCE(). 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesIndex = ceBuffer.length; 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // However, we need to write an offset for each CE. 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This is for CollationElementIterator.getOffset() to return 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // intermediate offsets from the unsafe-backwards segment. 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(offsets.size() < ceBuffer.length); 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offsets.addElement(offset); 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // For an expansion, the offset of each non-initial CE is the limit offset, 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // consistent with forward iteration. 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset = getOffset(); 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(offsets.size() < ceBuffer.length) { 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offsets.addElement(offset); 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(offsets.size() == ceBuffer.length); 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // End offset corresponding to just after the unsafe-backwards segment. 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offsets.addElement(offset); 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reset the forward iteration limit 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and move backward to before the segment for which we fetched CEs. 9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numCpFwd = -1; 9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumCodePoints(numBackward); 9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Use the collected CEs and return the last one. 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesIndex = 0; // Avoid cesIndex > ceBuffer.length when that gets decremented. 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ceBuffer.get(--ceBuffer.length); 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Turns a string of digits (bytes 0..9) 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * into a sequence of CEs that will sort in numeric order. 9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Starts from this ce32's digit value and consumes the following/preceding digits. 9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The digits string must not be empty and must not have leading zeros. 9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final void appendNumericCEs(int ce32, boolean forward) { 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Collect digits. 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Use some kind of a byte buffer? We only store values 0..9. 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder digits = new StringBuilder(); 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(forward) { 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char digit = Collation.digitFromCE32(ce32); 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller digits.append(digit); 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numCpFwd == 0) { break; } 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = nextCodePoint(); 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c < 0) { break; } 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = data.getCE32(c); 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.FALLBACK_CE32) { 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = data.base.getCE32(c); 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!Collation.hasCE32Tag(ce32, Collation.DIGIT_TAG)) { 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backwardNumCodePoints(1); 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numCpFwd > 0) { --numCpFwd; } 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char digit = Collation.digitFromCE32(ce32); 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller digits.append(digit); 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = previousCodePoint(); 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c < 0) { break; } 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = data.getCE32(c); 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.FALLBACK_CE32) { 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = data.base.getCE32(c); 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!Collation.hasCE32Tag(ce32, Collation.DIGIT_TAG)) { 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardNumCodePoints(1); 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reverse the digit string. 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller digits.reverse(); 10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pos = 0; 10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip leading zeros. 10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(pos < (digits.length() - 1) && digits.charAt(pos) == 0) { ++pos; } 10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Write a sequence of CEs for at most 254 digits at a time. 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int segmentLength = digits.length() - pos; 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(segmentLength > 254) { segmentLength = 254; } 10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendNumericSegmentCEs(digits.subSequence(pos, pos + segmentLength)); 10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos += segmentLength; 10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while(pos < digits.length()); 10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Turns 1..254 digits into a sequence of CEs. 10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Called by appendNumericCEs() for each segment of at most 254 digits. 10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final void appendNumericSegmentCEs(CharSequence digits) { 10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = digits.length(); 10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(1 <= length && length <= 254); 10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(length == 1 || digits.charAt(0) != 0); 10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long numericPrimary = data.numericPrimary; 10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: We use primary byte values 2..255: digits are not compressible. 10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length <= 7) { 10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Very dense encoding for small numbers. 10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = digits.charAt(0); 10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 1; i < length; ++i) { 10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = value * 10 + digits.charAt(i); 10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Primary weight second byte values: 10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 74 byte values 2.. 75 for small numbers in two-byte primary weights. 10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 40 byte values 76..115 for medium numbers in three-byte primary weights. 10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 16 byte values 116..131 for large numbers in four-byte primary weights. 10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 124 byte values 132..255 for very large numbers with 4..127 digit pairs. 10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstByte = 2; 10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numBytes = 74; 10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value < numBytes) { 10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Two-byte primary for 0..73, good for day & month numbers etc. 10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long primary = numericPrimary | ((firstByte + value) << 16); 10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.makeCE(primary)); 10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value -= numBytes; 10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstByte += numBytes; 10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numBytes = 40; 10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value < numBytes * 254) { 10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Three-byte primary for 74..10233=74+40*254-1, good for year numbers and more. 10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long primary = numericPrimary | 10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((firstByte + value / 254) << 16) | ((2 + value % 254) << 8); 10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.makeCE(primary)); 10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value -= numBytes * 254; 10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstByte += numBytes; 10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numBytes = 16; 10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value < numBytes * 254 * 254) { 10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Four-byte primary for 10234..1042489=10234+16*254*254-1. 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long primary = numericPrimary | (2 + value % 254); 10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value /= 254; 10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller primary |= (2 + value % 254) << 8; 10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value /= 254; 10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller primary |= (firstByte + value % 254) << 16; 10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.makeCE(primary)); 10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // original value > 1042489 10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(length >= 7); 10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The second primary byte value 132..255 indicates the number of digit pairs (4..127), 10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // then we generate primary bytes with those pairs. 10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Omit trailing 00 pairs. 10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Decrement the value for the last pair. 10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set the exponent. 4 pairs.132, 5 pairs.133, ..., 127 pairs.255. 10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numPairs = (length + 1) / 2; 10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long primary = numericPrimary | ((132 - 4 + numPairs) << 16); 10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the length without trailing 00 pairs. 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(digits.charAt(length - 1) == 0 && digits.charAt(length - 2) == 0) { 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length -= 2; 10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Read the first pair. 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pair; 10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pos; 10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((length & 1) != 0) { 10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Only "half a pair" if we have an odd number of digits. 10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pair = digits.charAt(0); 10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = 1; 10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pair = digits.charAt(0) * 10 + digits.charAt(1); 10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = 2; 11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pair = 11 + 2 * pair; 11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Add the pairs of digits between pos and length. 11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int shift = 8; 11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(pos < length) { 11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(shift == 0) { 11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Every three pairs/bytes we need to store a 4-byte-primary CE 11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and start with a new CE with the '0' primary lead byte. 11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller primary |= pair; 11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.makeCE(primary)); 11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller primary = numericPrimary; 11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller shift = 16; 11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller primary |= pair << shift; 11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller shift -= 8; 11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pair = 11 + 2 * (digits.charAt(pos) * 10 + digits.charAt(pos + 1)); 11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos += 2; 11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller primary |= (pair - 1) << shift; 11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ceBuffer.append(Collation.makeCE(primary)); 11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private CEBuffer ceBuffer; 11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int cesIndex; 11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private SkippedState skipped; 11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Number of code points to read forward, or -1. 11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Used as a forward iteration limit in previousCEUnsafe(). 11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int numCpFwd; 11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Numeric collation (CollationSettings.NUMERIC). 11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isNumeric; 11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 1134