12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Copyright (C) 1996-2010, International Business Machines Corporation and   *
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* others. All Rights Reserved.                                               *
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl;
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.DataOutputStream;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.OutputStream;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Arrays;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/**
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Builder class to manipulate and generate a trie.
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is useful for ICU data in primitive types.
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Provides a compact way to store information that is indexed by Unicode
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * values, such as character properties, types, keyboard values, etc. This is
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * very useful when you have a block of Unicode data that contains significant
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * values while the rest of the Unicode data is unused in the application or
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * when you have a lot of redundance, such as where all 21,000 Han ideographs
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * have the same value.  However, lookup is much faster than a hash table.
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A trie of any primitive data type serves two purposes:
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <UL type = round>
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     <LI>Fast access of the indexed values.
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     <LI>Smaller memory footprint.
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </UL>
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is a direct port from the ICU4C version
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @author             Syn Wee Quek
37836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic class IntTrieBuilder extends TrieBuilder
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller{
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // public constructor ----------------------------------------------
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Copy constructor
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public IntTrieBuilder(IntTrieBuilder table)
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        super(table);
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_data_ = new int[m_dataCapacity_];
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_initialValue_ = table.m_initialValue_;
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_leadUnitValue_ = table.m_leadUnitValue_;
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructs a build table
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param aliasdata data to be filled into table
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param maxdatalength maximum data length allowed in table
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param initialvalue inital data value
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param latin1linear is latin 1 to be linear
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public IntTrieBuilder(int aliasdata[], int maxdatalength,
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                          int initialvalue, int leadunitvalue,
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                          boolean latin1linear)
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        super();
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                  && maxdatalength < 1024)) {
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException(
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                               "Argument maxdatalength is too small");
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (aliasdata != null) {
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            m_data_ = aliasdata;
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            m_data_ = new int[maxdatalength];
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // preallocate and reset the first data block (block index 0)
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int j = DATA_BLOCK_LENGTH;
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (latin1linear) {
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // preallocate and reset the first block (number 0) and Latin-1
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // (U+0000..U+00ff) after that made sure above that
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // maxDataLength >= 1024
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // set indexes to point to consecutive data blocks
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int i = 0;
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            do {
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // do this at least for trie->index[0] even if that block is
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // only partly used for Latin-1
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                m_index_[i ++] = j;
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                j += DATA_BLOCK_LENGTH;
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } while (i < (256 >> SHIFT_));
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_dataLength_ = j;
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // reset the initially allocated blocks to the initial value
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_initialValue_ = initialvalue;
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_leadUnitValue_ = leadunitvalue;
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_dataCapacity_ = maxdatalength;
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_isLatin1Linear_ = latin1linear;
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_isCompacted_ = false;
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // public methods -------------------------------------------------------
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*public final void print()
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      {
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      int i = 0;
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      int oldvalue = m_index_[i];
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      int count = 0;
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      System.out.println("index length " + m_indexLength_
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      + " --------------------------");
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      while (i < m_indexLength_) {
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      if (m_index_[i] != oldvalue) {
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      System.out.println("index has " + count + " counts of "
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      + Integer.toHexString(oldvalue));
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      count = 0;
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      oldvalue = m_index_[i];
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      count ++;
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      i ++;
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      System.out.println("index has " + count + " counts of "
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      + Integer.toHexString(oldvalue));
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      i = 0;
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      oldvalue = m_data_[i];
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      count = 0;
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      System.out.println("data length " + m_dataLength_
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      + " --------------------------");
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      while (i < m_dataLength_) {
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      if (m_data_[i] != oldvalue) {
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      if ((oldvalue & 0xf1000000) == 0xf1000000) {
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      int temp = oldvalue & 0xffffff;
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      temp += 0x320;
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      oldvalue = 0xf1000000 | temp;
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      if ((oldvalue & 0xf2000000) == 0xf2000000) {
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      int temp = oldvalue & 0xffffff;
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      temp += 0x14a;
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      oldvalue = 0xf2000000 | temp;
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      System.out.println("data has " + count + " counts of "
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      + Integer.toHexString(oldvalue));
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      count = 0;
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      oldvalue = m_data_[i];
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      count ++;
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      i ++;
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      if ((oldvalue & 0xf1000000) == 0xf1000000) {
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      int temp = oldvalue & 0xffffff;
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      temp += 0x320;
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      oldvalue = 0xf1000000 | temp;
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      if ((oldvalue & 0xf2000000) == 0xf2000000) {
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      int temp = oldvalue & 0xffffff;
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      temp += 0x14a;
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      oldvalue = 0xf2000000 | temp;
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      System.out.println("data has " + count + " counts of "
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      + Integer.toHexString(oldvalue));
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Gets a 32 bit data from the table data
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ch codepoint which data is to be retrieved
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the 32 bit data
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getValue(int ch)
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // valid, uncompacted trie and valid c?
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int block = m_index_[ch >> SHIFT_];
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return m_data_[Math.abs(block) + (ch & MASK_)];
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Get a 32 bit data from the table data
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ch  code point for which data is to be retrieved.
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param inBlockZero  Output parameter, inBlockZero[0] returns true if the
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *                      char maps into block zero, otherwise false.
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the 32 bit data value.
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getValue(int ch, boolean [] inBlockZero)
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // valid, uncompacted trie and valid c?
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (inBlockZero != null) {
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                inBlockZero[0] = true;
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int block = m_index_[ch >> SHIFT_];
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (inBlockZero != null) {
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            inBlockZero[0] = (block == 0);
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return m_data_[Math.abs(block) + (ch & MASK_)];
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets a 32 bit data in the table data
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ch codepoint which data is to be set
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param value to set
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the set is successful, otherwise
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *              if the table has been compacted return false
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean setValue(int ch, int value)
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // valid, uncompacted trie and valid c?
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int block = getDataBlock(ch);
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (block < 0) {
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_data_[block + (ch & MASK_)] = value;
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Serializes the build table with 32 bit data
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param datamanipulate builder raw fold method implementation
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param triedatamanipulate result trie fold method
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a new trie
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                             Trie.DataManipulate triedatamanipulate)
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (datamanipulate == null) {
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Parameters can not be null");
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // fold and compact if necessary, also checks that indexLength is
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // within limits
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!m_isCompacted_) {
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // compact once without overlap to improve folding
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            compact(false);
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // fold the supplementary part of the index array
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fold(datamanipulate);
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // compact again with overlap for minimum data array length
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            compact(true);
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            m_isCompacted_ = true;
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // is dataLength within limits?
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_dataLength_ >= MAX_DATA_LENGTH_) {
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ArrayIndexOutOfBoundsException("Data length too small");
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char index[] = new char[m_indexLength_];
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int data[] = new int[m_dataLength_];
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // write the index (stage 1) array and the 32-bit data (stage 2) array
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // write 16-bit index values shifted right by INDEX_SHIFT_
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < m_indexLength_; i ++) {
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // write 32-bit data values
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        options |= OPTIONS_DATA_IS_32_BIT_;
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_isLatin1Linear_) {
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            options |= OPTIONS_LATIN1_IS_LINEAR_;
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return new IntTrie(index, data, m_initialValue_, options,
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           triedatamanipulate);
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Serializes the build table to an output stream.
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compacts the build-time trie after all values are set, and then
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * writes the serialized form onto an output stream.
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * After this, this build-time Trie can only be serialized again and/or closed;
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * no further values can be added.
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This function is the rough equivalent of utrie_seriaize() in ICU4C.
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param os the output stream to which the seriaized trie will be written.
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         If nul, the function still returns the size of the serialized Trie.
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param reduceTo16Bits If true, reduce the data size to 16 bits.  The resulting
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         serialized form can then be used to create a CharTrie.
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param datamanipulate builder raw fold method implementation
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the number of bytes written to the output stream.
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     public int serialize(OutputStream os, boolean reduceTo16Bits,
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            TrieBuilder.DataManipulate datamanipulate)  throws IOException {
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if (datamanipulate == null) {
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             throw new IllegalArgumentException("Parameters can not be null");
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         }
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         // fold and compact if necessary, also checks that indexLength is
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         // within limits
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if (!m_isCompacted_) {
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             // compact once without overlap to improve folding
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             compact(false);
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             // fold the supplementary part of the index array
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             fold(datamanipulate);
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             // compact again with overlap for minimum data array length
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             compact(true);
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             m_isCompacted_ = true;
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         }
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         // is dataLength within limits?
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         int length;
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if (reduceTo16Bits) {
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             length = m_dataLength_ + m_indexLength_;
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         } else {
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             length = m_dataLength_;
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         }
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if (length >= MAX_DATA_LENGTH_) {
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             throw new ArrayIndexOutOfBoundsException("Data length too small");
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         }
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         //  struct UTrieHeader {
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         //      int32_t   signature;
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         //      int32_t   options  (a bit field)
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         //      int32_t   indexLength
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         //      int32_t   dataLength
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         length = Trie.HEADER_LENGTH_ + 2*m_indexLength_;
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if(reduceTo16Bits) {
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             length+=2*m_dataLength_;
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         } else {
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             length+=4*m_dataLength_;
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         }
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if (os == null) {
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             // No output stream.  Just return the length of the serialized Trie, in bytes.
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             return length;
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         }
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         DataOutputStream dos = new DataOutputStream(os);
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         dos.writeInt(Trie.HEADER_SIGNATURE_);
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_);
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if(!reduceTo16Bits) {
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         }
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if(m_isLatin1Linear_) {
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         }
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         dos.writeInt(options);
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         dos.writeInt(m_indexLength_);
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         dos.writeInt(m_dataLength_);
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         if(reduceTo16Bits) {
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             for (int i=0; i<m_indexLength_; i++) {
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 dos.writeChar(v);
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             }
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             /* write 16-bit data values */
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             for(int i=0; i<m_dataLength_; i++) {
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 int v = m_data_[i] & 0x0000ffff;
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 dos.writeChar(v);
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             }
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         } else {
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             for (int i=0; i<m_indexLength_; i++) {
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 dos.writeChar(v);
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             }
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             /* write 32-bit data values */
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             for(int i=0; i<m_dataLength_; i++) {
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 dos.writeInt(m_data_[i]);
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             }
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         return length;
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Set a value in a range of code points [start..limit].
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * All code points c with start &lt;= c &lt; limit will get the value if
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * overwrite is true or if the old value is 0.
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start the first code point to get the value
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param limit one past the last code point to get the value
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param value the value
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param overwrite flag for whether old non-initial values are to be
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *        overwritten
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return false if a failure occurred (illegal argument or data array
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *               overrun)
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean setRange(int start, int limit, int value,
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            boolean overwrite)
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // repeat value in [start..limit[
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // mark index values for repeat-data blocks by setting bit 31 of the
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // index values fill around existing values if any, if(overwrite)
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // valid, uncompacted trie and valid indexes?
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_isCompacted_ || start < UCharacter.MIN_VALUE
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start == limit) {
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true; // nothing to do
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((start & MASK_) != 0) {
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // set partial block at [start..following block boundary[
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int block = getDataBlock(start);
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (block < 0) {
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (nextStart <= limit) {
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                          value, overwrite);
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start = nextStart;
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fillBlock(block, start & MASK_, limit & MASK_,
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                          value, overwrite);
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // number of positions in the last, partial block
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int rest = limit & MASK_;
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // round down limit to a block boundary
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        limit &= ~MASK_;
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // iterate over all-value blocks
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int repeatBlock = 0;
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (value == m_initialValue_) {
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // repeatBlock = 0; assigned above
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            repeatBlock = -1;
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (start < limit) {
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // get index value
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int block = m_index_[start >> SHIFT_];
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (block > 0) {
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // already allocated, fill in value
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else if (m_data_[-block] != value && (block == 0 || overwrite)) {
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // set the repeatBlock instead of the current block 0 or range
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // block
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (repeatBlock >= 0) {
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    m_index_[start >> SHIFT_] = -repeatBlock;
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else {
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // create and set and fill the repeatBlock
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    repeatBlock = getDataBlock(start);
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (repeatBlock < 0) {
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return false;
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // set the negative block number to indicate that it is a
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // repeat block
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    m_index_[start >> SHIFT_] = -repeatBlock;
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            start += DATA_BLOCK_LENGTH;
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (rest > 0) {
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // set partial block at [last block boundary..limit[
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int block = getDataBlock(start);
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (block < 0) {
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fillBlock(block, 0, rest, value, overwrite);
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // protected data member ------------------------------------------------
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    protected int m_data_[];
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    protected int m_initialValue_;
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  private data member ------------------------------------------------
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int m_leadUnitValue_;
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // private methods ------------------------------------------------------
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int allocDataBlock()
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int newBlock = m_dataLength_;
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int newTop = newBlock + DATA_BLOCK_LENGTH;
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (newTop > m_dataCapacity_) {
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // out of memory in the data array
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_dataLength_ = newTop;
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return newBlock;
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * No error checking for illegal arguments.
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ch codepoint to look for
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return -1 if no new data block available (out of memory in data array)
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getDataBlock(int ch)
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ch >>= SHIFT_;
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int indexValue = m_index_[ch];
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (indexValue > 0) {
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return indexValue;
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // allocate a new data block
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int newBlock = allocDataBlock();
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (newBlock < 0) {
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // out of memory in the data array
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_index_[ch] = newBlock;
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // copy-on-write for a block from a setRange()
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         DATA_BLOCK_LENGTH << 2);
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return newBlock;
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compact a folded build-time trie.
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The compaction
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - removes blocks that are identical with earlier ones
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - overlaps adjacent blocks as much as possible (if overlap == true)
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - moves blocks in steps of the data granularity
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - moves and overlaps blocks that overlap with multiple values in the overlap region
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * It does not
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - try to move and overlap blocks that are not already adjacent
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param overlap flag
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void compact(boolean overlap)
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_isCompacted_) {
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return; // nothing left to do
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // compaction
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // initialize the index map with "block is used/unused" flags
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        findUnusedBlocks();
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if Latin-1 is preallocated and linear, then do not compact Latin-1
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // data
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int overlapStart = DATA_BLOCK_LENGTH;
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_isLatin1Linear_ && SHIFT_ <= 8) {
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            overlapStart += 256;
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int newStart = DATA_BLOCK_LENGTH;
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int start = newStart; start < m_dataLength_;) {
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // start: index of first entry of current block
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // newStart: index where the current block is to be moved
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //           (right after current end of already-compacted data)
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // skip blocks that are not used
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (m_map_[start >>> SHIFT_] < 0) {
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // advance start to the next block
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start += DATA_BLOCK_LENGTH;
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // leave newStart with the previous block!
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // search for an identical block
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (start >= overlapStart) {
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                i = findSameDataBlock(m_data_, newStart, start,
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                          overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (i >= 0) {
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // found an identical block, set the other block's index
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // value for the current block
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    m_map_[start >>> SHIFT_] = i;
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // advance start to the next block
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    start += DATA_BLOCK_LENGTH;
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // leave newStart with the previous block!
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // see if the beginning of this block can be overlapped with the
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // end of the previous block
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(overlap && start>=overlapStart) {
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    i>0 && !equal_int(m_data_, newStart-i, start, i);
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    i-=DATA_GRANULARITY_) {}
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                i=0;
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (i > 0) {
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // some overlap
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                m_map_[start >>> SHIFT_] = newStart - i;
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // move the non-overlapping indexes to their new positions
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start += i;
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    m_data_[newStart ++] = m_data_[start ++];
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else if (newStart < start) {
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // no overlap, just move the indexes to their new positions
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                m_map_[start >>> SHIFT_] = newStart;
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    m_data_[newStart ++] = m_data_[start ++];
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else { // no overlap && newStart==start
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                m_map_[start >>> SHIFT_] = start;
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                newStart += DATA_BLOCK_LENGTH;
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start = newStart;
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // now adjust the index (stage 1) table
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i = 0; i < m_indexLength_; ++ i) {
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_dataLength_ = newStart;
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Find the same data block
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param data array
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param dataLength
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param otherBlock
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param step
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int findSameDataBlock(int data[], int dataLength,
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                               int otherBlock, int step)
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // ensure that we do not even partially get past dataLength
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        dataLength -= DATA_BLOCK_LENGTH;
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int block = 0; block <= dataLength; block += step) {
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return block;
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Fold the normalization data for supplementary code points into
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a compact area on top of the BMP-part of the trie index,
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with the lead surrogates indexing this compact area.
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Duplicate the index values for lead surrogates:
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * From inside the BMP area, where some may be overridden with folded values,
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to just after the BMP area, where they can be retrieved for
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * code point lookups.
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param manipulate fold implementation
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final void fold(DataManipulate manipulate)
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int index[] = m_index_;
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // copy the lead surrogate indexes into a temporary array
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         SURROGATE_BLOCK_COUNT_);
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // set all values for lead surrogate code *units* to leadUnitValue
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // so that by default runtime lookups will find no data for associated
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // supplementary code points, unless there is data for such code points
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // which will result in a non-zero folding value below that is set for
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the respective lead units
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the above saved the indexes for surrogate code *points*
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // fill the indexes with simplified code from utrie_setRange32()
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int block = 0;
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (m_leadUnitValue_ == m_initialValue_) {
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // leadUnitValue == initialValue, use all-initial-value block
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // block = 0; if block here left empty
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // create and fill the repeatBlock
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            block = allocDataBlock();
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (block < 0) {
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // data table overflow
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalStateException("Internal error: Out of memory space");
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // negative block number to indicate that it is a repeat block
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            block = -block;
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            m_index_[c] = block;
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Fold significant index values into the area just after the BMP
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // indexes.
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // In case the first lead surrogate has significant data,
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // its index block must be used first (in which case the folding is a
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // no-op).
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Later all folded index blocks are moved up one to insert the copied
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // lead surrogate indexes.
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int indexLength = BMP_INDEX_LENGTH_;
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // search for any index (stage 1) entries for supplementary code points
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int c = 0x10000; c < 0x110000;) {
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (index[c >> SHIFT_] != 0) {
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // there is data, treat the full block for a lead surrogate
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c &= ~0x3ff;
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // is there an identical index block?
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // get a folded value for [c..c+0x400[ and,
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // if different from the value for the lead surrogate code
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // point, set it for the lead surrogate code unit
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = manipulate.getFoldedValue(c,
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                      block + SURROGATE_BLOCK_COUNT_);
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (value != getValue(UTF16.getLeadSurrogate(c))) {
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (!setValue(UTF16.getLeadSurrogate(c), value)) {
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // data table overflow
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        throw new ArrayIndexOutOfBoundsException(
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                                 "Data table overflow");
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // if we did not find an identical index block...
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (block == indexLength) {
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // move the actual index (stage 1) entries from the
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // supplementary position to the new one
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        System.arraycopy(index, c >> SHIFT_, index, indexLength,
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                         SURROGATE_BLOCK_COUNT_);
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        indexLength += SURROGATE_BLOCK_COUNT_;
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c += 0x400;
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c += DATA_BLOCK_LENGTH;
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // index array overflow?
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // This is to guarantee that a folding offset is of the form
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If the index is too large, then n>=1024 and more than 10 bits are
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // necessary.
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // In fact, it can only ever become n==1024 with completely unfoldable
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // data and the additional block of duplicated values for lead
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // surrogates.
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (indexLength >= MAX_INDEX_LENGTH_) {
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ArrayIndexOutOfBoundsException("Index table overflow");
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // make space for the lead surrogate index block and insert it between
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the BMP indexes and the folded ones
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        System.arraycopy(index, BMP_INDEX_LENGTH_, index,
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         indexLength - BMP_INDEX_LENGTH_);
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         SURROGATE_BLOCK_COUNT_);
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        indexLength += SURROGATE_BLOCK_COUNT_;
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        m_indexLength_ = indexLength;
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
774836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void fillBlock(int block, int start, int limit, int value,
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           boolean overwrite)
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        limit += block;
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        block += start;
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (overwrite) {
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (block < limit) {
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                m_data_[block ++] = value;
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (block < limit) {
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (m_data_[block] == m_initialValue_) {
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    m_data_[block] = value;
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++ block;
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
797