12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller****************************************************************************** 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Copyright (C) 1996-2010, International Business Machines Corporation and * 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* others. All Rights Reserved. * 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller****************************************************************************** 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl; 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.DataOutputStream; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.OutputStream; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Arrays; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Builder class to manipulate and generate a trie. 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is useful for ICU data in primitive types. 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Provides a compact way to store information that is indexed by Unicode 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * values, such as character properties, types, keyboard values, etc. This is 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * very useful when you have a block of Unicode data that contains significant 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * values while the rest of the Unicode data is unused in the application or 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * when you have a lot of redundance, such as where all 21,000 Han ideographs 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * have the same value. However, lookup is much faster than a hash table. 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A trie of any primitive data type serves two purposes: 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <UL type = round> 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <LI>Fast access of the indexed values. 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <LI>Smaller memory footprint. 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </UL> 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is a direct port from the ICU4C version 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @author Syn Wee Quek 37836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic class IntTrieBuilder extends TrieBuilder 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller{ 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // public constructor ---------------------------------------------- 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copy constructor 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public IntTrieBuilder(IntTrieBuilder table) 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller super(table); 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_data_ = new int[m_dataCapacity_]; 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_); 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_initialValue_ = table.m_initialValue_; 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_leadUnitValue_ = table.m_leadUnitValue_; 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructs a build table 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param aliasdata data to be filled into table 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param maxdatalength maximum data length allowed in table 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param initialvalue inital data value 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param latin1linear is latin 1 to be linear 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public IntTrieBuilder(int aliasdata[], int maxdatalength, 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int initialvalue, int leadunitvalue, 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean latin1linear) 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller super(); 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && maxdatalength < 1024)) { 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException( 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "Argument maxdatalength is too small"); 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (aliasdata != null) { 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_data_ = aliasdata; 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_data_ = new int[maxdatalength]; 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // preallocate and reset the first data block (block index 0) 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int j = DATA_BLOCK_LENGTH; 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (latin1linear) { 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // preallocate and reset the first block (number 0) and Latin-1 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (U+0000..U+00ff) after that made sure above that 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // maxDataLength >= 1024 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set indexes to point to consecutive data blocks 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0; 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // do this at least for trie->index[0] even if that block is 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // only partly used for Latin-1 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_index_[i ++] = j; 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller j += DATA_BLOCK_LENGTH; 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while (i < (256 >> SHIFT_)); 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_dataLength_ = j; 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // reset the initially allocated blocks to the initial value 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Arrays.fill(m_data_, 0, m_dataLength_, initialvalue); 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_initialValue_ = initialvalue; 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_leadUnitValue_ = leadunitvalue; 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_dataCapacity_ = maxdatalength; 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_isLatin1Linear_ = latin1linear; 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_isCompacted_ = false; 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // public methods ------------------------------------------------------- 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*public final void print() 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0; 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int oldvalue = m_index_[i]; 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = 0; 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("index length " + m_indexLength_ 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + " --------------------------"); 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (i < m_indexLength_) { 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_index_[i] != oldvalue) { 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("index has " + count + " counts of " 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + Integer.toHexString(oldvalue)); 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count = 0; 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldvalue = m_index_[i]; 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count ++; 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i ++; 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("index has " + count + " counts of " 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + Integer.toHexString(oldvalue)); 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = 0; 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldvalue = m_data_[i]; 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count = 0; 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("data length " + m_dataLength_ 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + " --------------------------"); 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (i < m_dataLength_) { 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_data_[i] != oldvalue) { 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((oldvalue & 0xf1000000) == 0xf1000000) { 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int temp = oldvalue & 0xffffff; 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller temp += 0x320; 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldvalue = 0xf1000000 | temp; 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((oldvalue & 0xf2000000) == 0xf2000000) { 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int temp = oldvalue & 0xffffff; 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller temp += 0x14a; 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldvalue = 0xf2000000 | temp; 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("data has " + count + " counts of " 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + Integer.toHexString(oldvalue)); 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count = 0; 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldvalue = m_data_[i]; 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count ++; 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i ++; 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((oldvalue & 0xf1000000) == 0xf1000000) { 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int temp = oldvalue & 0xffffff; 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller temp += 0x320; 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldvalue = 0xf1000000 | temp; 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((oldvalue & 0xf2000000) == 0xf2000000) { 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int temp = oldvalue & 0xffffff; 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller temp += 0x14a; 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oldvalue = 0xf2000000 | temp; 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("data has " + count + " counts of " 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + Integer.toHexString(oldvalue)); 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Gets a 32 bit data from the table data 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param ch codepoint which data is to be retrieved 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the 32 bit data 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getValue(int ch) 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // valid, uncompacted trie and valid c? 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int block = m_index_[ch >> SHIFT_]; 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return m_data_[Math.abs(block) + (ch & MASK_)]; 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Get a 32 bit data from the table data 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param ch code point for which data is to be retrieved. 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param inBlockZero Output parameter, inBlockZero[0] returns true if the 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * char maps into block zero, otherwise false. 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the 32 bit data value. 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getValue(int ch, boolean [] inBlockZero) 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // valid, uncompacted trie and valid c? 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (inBlockZero != null) { 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inBlockZero[0] = true; 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int block = m_index_[ch >> SHIFT_]; 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (inBlockZero != null) { 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inBlockZero[0] = (block == 0); 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return m_data_[Math.abs(block) + (ch & MASK_)]; 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets a 32 bit data in the table data 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param ch codepoint which data is to be set 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param value to set 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the set is successful, otherwise 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if the table has been compacted return false 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean setValue(int ch, int value) 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // valid, uncompacted trie and valid c? 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int block = getDataBlock(ch); 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (block < 0) { 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_data_[block + (ch & MASK_)] = value; 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Serializes the build table with 32 bit data 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param datamanipulate builder raw fold method implementation 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param triedatamanipulate result trie fold method 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a new trie 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate, 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie.DataManipulate triedatamanipulate) 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (datamanipulate == null) { 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Parameters can not be null"); 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fold and compact if necessary, also checks that indexLength is 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // within limits 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!m_isCompacted_) { 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compact once without overlap to improve folding 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compact(false); 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fold the supplementary part of the index array 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fold(datamanipulate); 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compact again with overlap for minimum data array length 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compact(true); 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_isCompacted_ = true; 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is dataLength within limits? 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_dataLength_ >= MAX_DATA_LENGTH_) { 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ArrayIndexOutOfBoundsException("Data length too small"); 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char index[] = new char[m_indexLength_]; 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int data[] = new int[m_dataLength_]; 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write the index (stage 1) array and the 32-bit data (stage 2) array 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write 16-bit index values shifted right by INDEX_SHIFT_ 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < m_indexLength_; i ++) { 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_); 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write 32-bit data values 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(m_data_, 0, data, 0, m_dataLength_); 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_); 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller options |= OPTIONS_DATA_IS_32_BIT_; 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_isLatin1Linear_) { 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller options |= OPTIONS_LATIN1_IS_LINEAR_; 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return new IntTrie(index, data, m_initialValue_, options, 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller triedatamanipulate); 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Serializes the build table to an output stream. 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compacts the build-time trie after all values are set, and then 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * writes the serialized form onto an output stream. 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * After this, this build-time Trie can only be serialized again and/or closed; 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * no further values can be added. 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This function is the rough equivalent of utrie_seriaize() in ICU4C. 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param os the output stream to which the seriaized trie will be written. 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If nul, the function still returns the size of the serialized Trie. 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param reduceTo16Bits If true, reduce the data size to 16 bits. The resulting 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * serialized form can then be used to create a CharTrie. 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param datamanipulate builder raw fold method implementation 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the number of bytes written to the output stream. 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int serialize(OutputStream os, boolean reduceTo16Bits, 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller TrieBuilder.DataManipulate datamanipulate) throws IOException { 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (datamanipulate == null) { 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Parameters can not be null"); 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fold and compact if necessary, also checks that indexLength is 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // within limits 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!m_isCompacted_) { 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compact once without overlap to improve folding 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compact(false); 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fold the supplementary part of the index array 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fold(datamanipulate); 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compact again with overlap for minimum data array length 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compact(true); 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_isCompacted_ = true; 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is dataLength within limits? 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length; 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (reduceTo16Bits) { 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length = m_dataLength_ + m_indexLength_; 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length = m_dataLength_; 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (length >= MAX_DATA_LENGTH_) { 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ArrayIndexOutOfBoundsException("Data length too small"); 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // struct UTrieHeader { 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int32_t signature; 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int32_t options (a bit field) 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int32_t indexLength 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int32_t dataLength 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length = Trie.HEADER_LENGTH_ + 2*m_indexLength_; 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(reduceTo16Bits) { 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length+=2*m_dataLength_; 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length+=4*m_dataLength_; 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (os == null) { 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No output stream. Just return the length of the serialized Trie, in bytes. 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return length; 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller DataOutputStream dos = new DataOutputStream(os); 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dos.writeInt(Trie.HEADER_SIGNATURE_); 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_); 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!reduceTo16Bits) { 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_; 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(m_isLatin1Linear_) { 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_; 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dos.writeInt(options); 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dos.writeInt(m_indexLength_); 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dos.writeInt(m_dataLength_); 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */ 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(reduceTo16Bits) { 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */ 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<m_indexLength_; i++) { 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_; 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dos.writeChar(v); 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* write 16-bit data values */ 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=0; i<m_dataLength_; i++) { 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int v = m_data_[i] & 0x0000ffff; 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dos.writeChar(v); 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */ 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<m_indexLength_; i++) { 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_; 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dos.writeChar(v); 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* write 32-bit data values */ 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=0; i<m_dataLength_; i++) { 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dos.writeInt(m_data_[i]); 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return length; 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Set a value in a range of code points [start..limit]. 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * All code points c with start <= c < limit will get the value if 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * overwrite is true or if the old value is 0. 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start the first code point to get the value 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit one past the last code point to get the value 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param value the value 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param overwrite flag for whether old non-initial values are to be 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * overwritten 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return false if a failure occurred (illegal argument or data array 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * overrun) 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean setRange(int start, int limit, int value, 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean overwrite) 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // repeat value in [start..limit[ 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // mark index values for repeat-data blocks by setting bit 31 of the 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // index values fill around existing values if any, if(overwrite) 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // valid, uncompacted trie and valid indexes? 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_isCompacted_ || start < UCharacter.MIN_VALUE 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller || limit > (UCharacter.MAX_VALUE + 1) || start > limit) { 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start == limit) { 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; // nothing to do 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((start & MASK_) != 0) { 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set partial block at [start..following block boundary[ 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int block = getDataBlock(start); 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (block < 0) { 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_; 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (nextStart <= limit) { 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH, 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value, overwrite); 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = nextStart; 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fillBlock(block, start & MASK_, limit & MASK_, 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value, overwrite); 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // number of positions in the last, partial block 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int rest = limit & MASK_; 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // round down limit to a block boundary 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit &= ~MASK_; 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // iterate over all-value blocks 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int repeatBlock = 0; 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (value == m_initialValue_) { 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // repeatBlock = 0; assigned above 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller repeatBlock = -1; 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (start < limit) { 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get index value 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int block = m_index_[start >> SHIFT_]; 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (block > 0) { 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // already allocated, fill in value 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite); 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (m_data_[-block] != value && (block == 0 || overwrite)) { 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set the repeatBlock instead of the current block 0 or range 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // block 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (repeatBlock >= 0) { 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_index_[start >> SHIFT_] = -repeatBlock; 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // create and set and fill the repeatBlock 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller repeatBlock = getDataBlock(start); 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (repeatBlock < 0) { 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set the negative block number to indicate that it is a 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // repeat block 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_index_[start >> SHIFT_] = -repeatBlock; 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true); 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start += DATA_BLOCK_LENGTH; 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (rest > 0) { 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set partial block at [last block boundary..limit[ 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int block = getDataBlock(start); 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (block < 0) { 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fillBlock(block, 0, rest, value, overwrite); 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // protected data member ------------------------------------------------ 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected int m_data_[]; 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected int m_initialValue_; 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private data member ------------------------------------------------ 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int m_leadUnitValue_; 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private methods ------------------------------------------------------ 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int allocDataBlock() 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newBlock = m_dataLength_; 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newTop = newBlock + DATA_BLOCK_LENGTH; 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (newTop > m_dataCapacity_) { 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // out of memory in the data array 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_dataLength_ = newTop; 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return newBlock; 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * No error checking for illegal arguments. 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param ch codepoint to look for 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return -1 if no new data block available (out of memory in data array) 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getDataBlock(int ch) 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ch >>= SHIFT_; 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int indexValue = m_index_[ch]; 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (indexValue > 0) { 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return indexValue; 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // allocate a new data block 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newBlock = allocDataBlock(); 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (newBlock < 0) { 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // out of memory in the data array 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_index_[ch] = newBlock; 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // copy-on-write for a block from a setRange() 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock, 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller DATA_BLOCK_LENGTH << 2); 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return newBlock; 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compact a folded build-time trie. 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The compaction 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - removes blocks that are identical with earlier ones 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - overlaps adjacent blocks as much as possible (if overlap == true) 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - moves blocks in steps of the data granularity 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - moves and overlaps blocks that overlap with multiple values in the overlap region 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It does not 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - try to move and overlap blocks that are not already adjacent 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param overlap flag 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void compact(boolean overlap) 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_isCompacted_) { 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; // nothing left to do 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compaction 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // initialize the index map with "block is used/unused" flags 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller findUnusedBlocks(); 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if Latin-1 is preallocated and linear, then do not compact Latin-1 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // data 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int overlapStart = DATA_BLOCK_LENGTH; 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_isLatin1Linear_ && SHIFT_ <= 8) { 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller overlapStart += 256; 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newStart = DATA_BLOCK_LENGTH; 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int start = newStart; start < m_dataLength_;) { 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // start: index of first entry of current block 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // newStart: index where the current block is to be moved 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (right after current end of already-compacted data) 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // skip blocks that are not used 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_map_[start >>> SHIFT_] < 0) { 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // advance start to the next block 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start += DATA_BLOCK_LENGTH; 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // leave newStart with the previous block! 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // search for an identical block 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start >= overlapStart) { 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = findSameDataBlock(m_data_, newStart, start, 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH); 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i >= 0) { 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // found an identical block, set the other block's index 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // value for the current block 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_map_[start >>> SHIFT_] = i; 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // advance start to the next block 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start += DATA_BLOCK_LENGTH; 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // leave newStart with the previous block! 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // see if the beginning of this block can be overlapped with the 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // end of the previous block 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(overlap && start>=overlapStart) { 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_; 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i>0 && !equal_int(m_data_, newStart-i, start, i); 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i-=DATA_GRANULARITY_) {} 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i=0; 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i > 0) { 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // some overlap 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_map_[start >>> SHIFT_] = newStart - i; 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // move the non-overlapping indexes to their new positions 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start += i; 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) { 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_data_[newStart ++] = m_data_[start ++]; 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (newStart < start) { 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // no overlap, just move the indexes to their new positions 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_map_[start >>> SHIFT_] = newStart; 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i = DATA_BLOCK_LENGTH; i > 0; -- i) { 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_data_[newStart ++] = m_data_[start ++]; 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { // no overlap && newStart==start 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_map_[start >>> SHIFT_] = start; 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newStart += DATA_BLOCK_LENGTH; 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = newStart; 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // now adjust the index (stage 1) table 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i = 0; i < m_indexLength_; ++ i) { 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_]; 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_dataLength_ = newStart; 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Find the same data block 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param data array 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param dataLength 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param otherBlock 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param step 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int findSameDataBlock(int data[], int dataLength, 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int otherBlock, int step) 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ensure that we do not even partially get past dataLength 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataLength -= DATA_BLOCK_LENGTH; 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int block = 0; block <= dataLength; block += step) { 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) { 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return block; 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Fold the normalization data for supplementary code points into 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a compact area on top of the BMP-part of the trie index, 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with the lead surrogates indexing this compact area. 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Duplicate the index values for lead surrogates: 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * From inside the BMP area, where some may be overridden with folded values, 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to just after the BMP area, where they can be retrieved for 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * code point lookups. 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param manipulate fold implementation 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final void fold(DataManipulate manipulate) 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_]; 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index[] = m_index_; 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // copy the lead surrogate indexes into a temporary array 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0, 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SURROGATE_BLOCK_COUNT_); 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set all values for lead surrogate code *units* to leadUnitValue 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so that by default runtime lookups will find no data for associated 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // supplementary code points, unless there is data for such code points 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // which will result in a non-zero folding value below that is set for 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the respective lead units 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the above saved the indexes for surrogate code *points* 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fill the indexes with simplified code from utrie_setRange32() 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int block = 0; 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_leadUnitValue_ == m_initialValue_) { 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // leadUnitValue == initialValue, use all-initial-value block 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // block = 0; if block here left empty 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // create and fill the repeatBlock 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller block = allocDataBlock(); 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (block < 0) { 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // data table overflow 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalStateException("Internal error: Out of memory space"); 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true); 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // negative block number to indicate that it is a repeat block 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller block = -block; 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) { 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_index_[c] = block; 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fold significant index values into the area just after the BMP 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // indexes. 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In case the first lead surrogate has significant data, 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // its index block must be used first (in which case the folding is a 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // no-op). 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Later all folded index blocks are moved up one to insert the copied 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // lead surrogate indexes. 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int indexLength = BMP_INDEX_LENGTH_; 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // search for any index (stage 1) entries for supplementary code points 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int c = 0x10000; c < 0x110000;) { 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (index[c >> SHIFT_] != 0) { 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // there is data, treat the full block for a lead surrogate 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c &= ~0x3ff; 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is there an identical index block? 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller block = findSameIndexBlock(index, indexLength, c >> SHIFT_); 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get a folded value for [c..c+0x400[ and, 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if different from the value for the lead surrogate code 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // point, set it for the lead surrogate code unit 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = manipulate.getFoldedValue(c, 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller block + SURROGATE_BLOCK_COUNT_); 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (value != getValue(UTF16.getLeadSurrogate(c))) { 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!setValue(UTF16.getLeadSurrogate(c), value)) { 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // data table overflow 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ArrayIndexOutOfBoundsException( 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "Data table overflow"); 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if we did not find an identical index block... 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (block == indexLength) { 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // move the actual index (stage 1) entries from the 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // supplementary position to the new one 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(index, c >> SHIFT_, index, indexLength, 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SURROGATE_BLOCK_COUNT_); 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller indexLength += SURROGATE_BLOCK_COUNT_; 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c += 0x400; 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c += DATA_BLOCK_LENGTH; 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // index array overflow? 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This is to guarantee that a folding offset is of the form 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the index is too large, then n>=1024 and more than 10 bits are 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // necessary. 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In fact, it can only ever become n==1024 with completely unfoldable 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // data and the additional block of duplicated values for lead 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // surrogates. 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (indexLength >= MAX_INDEX_LENGTH_) { 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ArrayIndexOutOfBoundsException("Index table overflow"); 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make space for the lead surrogate index block and insert it between 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the BMP indexes and the folded ones 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(index, BMP_INDEX_LENGTH_, index, 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_, 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller indexLength - BMP_INDEX_LENGTH_); 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_, 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SURROGATE_BLOCK_COUNT_); 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller indexLength += SURROGATE_BLOCK_COUNT_; 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_indexLength_ = indexLength; 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 774836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void fillBlock(int block, int start, int limit, int value, 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean overwrite) 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit += block; 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller block += start; 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (overwrite) { 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (block < limit) { 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_data_[block ++] = value; 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (block < limit) { 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m_data_[block] == m_initialValue_) { 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller m_data_[block] = value; 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++ block; 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 797