1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4******************************************************************************
5* Copyright (C) 1996-2010, International Business Machines Corporation and   *
6* others. All Rights Reserved.                                               *
7******************************************************************************
8*/
9
10package com.ibm.icu.impl;
11
12import java.io.DataOutputStream;
13import java.io.IOException;
14import java.io.OutputStream;
15import java.util.Arrays;
16
17import com.ibm.icu.lang.UCharacter;
18import com.ibm.icu.text.UTF16;
19
20/**
21 * Builder class to manipulate and generate a trie.
22 * This is useful for ICU data in primitive types.
23 * Provides a compact way to store information that is indexed by Unicode
24 * values, such as character properties, types, keyboard values, etc. This is
25 * very useful when you have a block of Unicode data that contains significant
26 * values while the rest of the Unicode data is unused in the application or
27 * when you have a lot of redundance, such as where all 21,000 Han ideographs
28 * have the same value.  However, lookup is much faster than a hash table.
29 * A trie of any primitive data type serves two purposes:
30 * <UL type = round>
31 *     <LI>Fast access of the indexed values.
32 *     <LI>Smaller memory footprint.
33 * </UL>
34 * This is a direct port from the ICU4C version
35 * @author             Syn Wee Quek
36 */
37public class IntTrieBuilder extends TrieBuilder
38{
39    // public constructor ----------------------------------------------
40
41    /**
42     * Copy constructor
43     */
44    public IntTrieBuilder(IntTrieBuilder table)
45    {
46        super(table);
47        m_data_ = new int[m_dataCapacity_];
48        System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
49        m_initialValue_ = table.m_initialValue_;
50        m_leadUnitValue_ = table.m_leadUnitValue_;
51    }
52
53    /**
54     * Constructs a build table
55     * @param aliasdata data to be filled into table
56     * @param maxdatalength maximum data length allowed in table
57     * @param initialvalue inital data value
58     * @param latin1linear is latin 1 to be linear
59     */
60    public IntTrieBuilder(int aliasdata[], int maxdatalength,
61                          int initialvalue, int leadunitvalue,
62                          boolean latin1linear)
63    {
64        super();
65        if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
66                                                  && maxdatalength < 1024)) {
67            throw new IllegalArgumentException(
68                                               "Argument maxdatalength is too small");
69        }
70
71        if (aliasdata != null) {
72            m_data_ = aliasdata;
73        }
74        else {
75            m_data_ = new int[maxdatalength];
76        }
77
78        // preallocate and reset the first data block (block index 0)
79        int j = DATA_BLOCK_LENGTH;
80
81        if (latin1linear) {
82            // preallocate and reset the first block (number 0) and Latin-1
83            // (U+0000..U+00ff) after that made sure above that
84            // maxDataLength >= 1024
85            // set indexes to point to consecutive data blocks
86            int i = 0;
87            do {
88                // do this at least for trie->index[0] even if that block is
89                // only partly used for Latin-1
90                m_index_[i ++] = j;
91                j += DATA_BLOCK_LENGTH;
92            } while (i < (256 >> SHIFT_));
93        }
94
95        m_dataLength_ = j;
96        // reset the initially allocated blocks to the initial value
97        Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
98        m_initialValue_ = initialvalue;
99        m_leadUnitValue_ = leadunitvalue;
100        m_dataCapacity_ = maxdatalength;
101        m_isLatin1Linear_ = latin1linear;
102        m_isCompacted_ = false;
103    }
104
105    // public methods -------------------------------------------------------
106
107    /*public final void print()
108      {
109      int i = 0;
110      int oldvalue = m_index_[i];
111      int count = 0;
112      System.out.println("index length " + m_indexLength_
113      + " --------------------------");
114      while (i < m_indexLength_) {
115      if (m_index_[i] != oldvalue) {
116      System.out.println("index has " + count + " counts of "
117      + Integer.toHexString(oldvalue));
118      count = 0;
119      oldvalue = m_index_[i];
120      }
121      count ++;
122      i ++;
123      }
124      System.out.println("index has " + count + " counts of "
125      + Integer.toHexString(oldvalue));
126      i = 0;
127      oldvalue = m_data_[i];
128      count = 0;
129      System.out.println("data length " + m_dataLength_
130      + " --------------------------");
131      while (i < m_dataLength_) {
132      if (m_data_[i] != oldvalue) {
133      if ((oldvalue & 0xf1000000) == 0xf1000000) {
134      int temp = oldvalue & 0xffffff;
135      temp += 0x320;
136      oldvalue = 0xf1000000 | temp;
137      }
138      if ((oldvalue & 0xf2000000) == 0xf2000000) {
139      int temp = oldvalue & 0xffffff;
140      temp += 0x14a;
141      oldvalue = 0xf2000000 | temp;
142      }
143      System.out.println("data has " + count + " counts of "
144      + Integer.toHexString(oldvalue));
145      count = 0;
146      oldvalue = m_data_[i];
147      }
148      count ++;
149      i ++;
150      }
151      if ((oldvalue & 0xf1000000) == 0xf1000000) {
152      int temp = oldvalue & 0xffffff;
153      temp += 0x320;
154      oldvalue = 0xf1000000 | temp;
155      }
156      if ((oldvalue & 0xf2000000) == 0xf2000000) {
157      int temp = oldvalue & 0xffffff;
158      temp += 0x14a;
159      oldvalue = 0xf2000000 | temp;
160      }
161      System.out.println("data has " + count + " counts of "
162      + Integer.toHexString(oldvalue));
163      }
164    */
165    /**
166     * Gets a 32 bit data from the table data
167     * @param ch codepoint which data is to be retrieved
168     * @return the 32 bit data
169     */
170    public int getValue(int ch)
171    {
172        // valid, uncompacted trie and valid c?
173        if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
174            return 0;
175        }
176
177        int block = m_index_[ch >> SHIFT_];
178        return m_data_[Math.abs(block) + (ch & MASK_)];
179    }
180
181    /**
182     * Get a 32 bit data from the table data
183     * @param ch  code point for which data is to be retrieved.
184     * @param inBlockZero  Output parameter, inBlockZero[0] returns true if the
185     *                      char maps into block zero, otherwise false.
186     * @return the 32 bit data value.
187     */
188    public int getValue(int ch, boolean [] inBlockZero)
189    {
190        // valid, uncompacted trie and valid c?
191        if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
192            if (inBlockZero != null) {
193                inBlockZero[0] = true;
194            }
195            return 0;
196        }
197
198        int block = m_index_[ch >> SHIFT_];
199        if (inBlockZero != null) {
200            inBlockZero[0] = (block == 0);
201        }
202        return m_data_[Math.abs(block) + (ch & MASK_)];
203    }
204
205
206    /**
207     * Sets a 32 bit data in the table data
208     * @param ch codepoint which data is to be set
209     * @param value to set
210     * @return true if the set is successful, otherwise
211     *              if the table has been compacted return false
212     */
213    public boolean setValue(int ch, int value)
214    {
215        // valid, uncompacted trie and valid c?
216        if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
217            return false;
218        }
219
220        int block = getDataBlock(ch);
221        if (block < 0) {
222            return false;
223        }
224
225        m_data_[block + (ch & MASK_)] = value;
226        return true;
227    }
228
229    /**
230     * Serializes the build table with 32 bit data
231     * @param datamanipulate builder raw fold method implementation
232     * @param triedatamanipulate result trie fold method
233     * @return a new trie
234     */
235    public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
236                             Trie.DataManipulate triedatamanipulate)
237    {
238        if (datamanipulate == null) {
239            throw new IllegalArgumentException("Parameters can not be null");
240        }
241        // fold and compact if necessary, also checks that indexLength is
242        // within limits
243        if (!m_isCompacted_) {
244            // compact once without overlap to improve folding
245            compact(false);
246            // fold the supplementary part of the index array
247            fold(datamanipulate);
248            // compact again with overlap for minimum data array length
249            compact(true);
250            m_isCompacted_ = true;
251        }
252        // is dataLength within limits?
253        if (m_dataLength_ >= MAX_DATA_LENGTH_) {
254            throw new ArrayIndexOutOfBoundsException("Data length too small");
255        }
256
257        char index[] = new char[m_indexLength_];
258        int data[] = new int[m_dataLength_];
259        // write the index (stage 1) array and the 32-bit data (stage 2) array
260        // write 16-bit index values shifted right by INDEX_SHIFT_
261        for (int i = 0; i < m_indexLength_; i ++) {
262            index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
263        }
264        // write 32-bit data values
265        System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
266
267        int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
268        options |= OPTIONS_DATA_IS_32_BIT_;
269        if (m_isLatin1Linear_) {
270            options |= OPTIONS_LATIN1_IS_LINEAR_;
271        }
272        return new IntTrie(index, data, m_initialValue_, options,
273                           triedatamanipulate);
274    }
275
276
277    /**
278     * Serializes the build table to an output stream.
279     *
280     * Compacts the build-time trie after all values are set, and then
281     * writes the serialized form onto an output stream.
282     *
283     * After this, this build-time Trie can only be serialized again and/or closed;
284     * no further values can be added.
285     *
286     * This function is the rough equivalent of utrie_seriaize() in ICU4C.
287     *
288     * @param os the output stream to which the seriaized trie will be written.
289     *         If nul, the function still returns the size of the serialized Trie.
290     * @param reduceTo16Bits If true, reduce the data size to 16 bits.  The resulting
291     *         serialized form can then be used to create a CharTrie.
292     * @param datamanipulate builder raw fold method implementation
293     * @return the number of bytes written to the output stream.
294     */
295     public int serialize(OutputStream os, boolean reduceTo16Bits,
296            TrieBuilder.DataManipulate datamanipulate)  throws IOException {
297         if (datamanipulate == null) {
298             throw new IllegalArgumentException("Parameters can not be null");
299         }
300
301         // fold and compact if necessary, also checks that indexLength is
302         // within limits
303         if (!m_isCompacted_) {
304             // compact once without overlap to improve folding
305             compact(false);
306             // fold the supplementary part of the index array
307             fold(datamanipulate);
308             // compact again with overlap for minimum data array length
309             compact(true);
310             m_isCompacted_ = true;
311         }
312
313         // is dataLength within limits?
314         int length;
315         if (reduceTo16Bits) {
316             length = m_dataLength_ + m_indexLength_;
317         } else {
318             length = m_dataLength_;
319         }
320         if (length >= MAX_DATA_LENGTH_) {
321             throw new ArrayIndexOutOfBoundsException("Data length too small");
322         }
323
324         //  struct UTrieHeader {
325         //      int32_t   signature;
326         //      int32_t   options  (a bit field)
327         //      int32_t   indexLength
328         //      int32_t   dataLength
329         length = Trie.HEADER_LENGTH_ + 2*m_indexLength_;
330         if(reduceTo16Bits) {
331             length+=2*m_dataLength_;
332         } else {
333             length+=4*m_dataLength_;
334         }
335
336         if (os == null) {
337             // No output stream.  Just return the length of the serialized Trie, in bytes.
338             return length;
339         }
340
341         DataOutputStream dos = new DataOutputStream(os);
342         dos.writeInt(Trie.HEADER_SIGNATURE_);
343
344         int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_);
345         if(!reduceTo16Bits) {
346             options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
347         }
348         if(m_isLatin1Linear_) {
349             options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
350         }
351         dos.writeInt(options);
352
353         dos.writeInt(m_indexLength_);
354         dos.writeInt(m_dataLength_);
355
356         /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
357         if(reduceTo16Bits) {
358             /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
359             for (int i=0; i<m_indexLength_; i++) {
360                 int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
361                 dos.writeChar(v);
362             }
363
364             /* write 16-bit data values */
365             for(int i=0; i<m_dataLength_; i++) {
366                 int v = m_data_[i] & 0x0000ffff;
367                 dos.writeChar(v);
368             }
369         } else {
370             /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
371             for (int i=0; i<m_indexLength_; i++) {
372                 int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
373                 dos.writeChar(v);
374             }
375
376             /* write 32-bit data values */
377             for(int i=0; i<m_dataLength_; i++) {
378                 dos.writeInt(m_data_[i]);
379             }
380        }
381
382         return length;
383
384    }
385
386
387    /**
388     * Set a value in a range of code points [start..limit].
389     * All code points c with start &lt;= c &lt; limit will get the value if
390     * overwrite is true or if the old value is 0.
391     * @param start the first code point to get the value
392     * @param limit one past the last code point to get the value
393     * @param value the value
394     * @param overwrite flag for whether old non-initial values are to be
395     *        overwritten
396     * @return false if a failure occurred (illegal argument or data array
397     *               overrun)
398     */
399    public boolean setRange(int start, int limit, int value,
400                            boolean overwrite)
401    {
402        // repeat value in [start..limit[
403        // mark index values for repeat-data blocks by setting bit 31 of the
404        // index values fill around existing values if any, if(overwrite)
405
406        // valid, uncompacted trie and valid indexes?
407        if (m_isCompacted_ || start < UCharacter.MIN_VALUE
408            || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
409            || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
410            return false;
411        }
412
413        if (start == limit) {
414            return true; // nothing to do
415        }
416
417        if ((start & MASK_) != 0) {
418            // set partial block at [start..following block boundary[
419            int block = getDataBlock(start);
420            if (block < 0) {
421                return false;
422            }
423
424            int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
425            if (nextStart <= limit) {
426                fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
427                          value, overwrite);
428                start = nextStart;
429            }
430            else {
431                fillBlock(block, start & MASK_, limit & MASK_,
432                          value, overwrite);
433                return true;
434            }
435        }
436
437        // number of positions in the last, partial block
438        int rest = limit & MASK_;
439
440        // round down limit to a block boundary
441        limit &= ~MASK_;
442
443        // iterate over all-value blocks
444        int repeatBlock = 0;
445        if (value == m_initialValue_) {
446            // repeatBlock = 0; assigned above
447        }
448        else {
449            repeatBlock = -1;
450        }
451        while (start < limit) {
452            // get index value
453            int block = m_index_[start >> SHIFT_];
454            if (block > 0) {
455                // already allocated, fill in value
456                fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
457            }
458            else if (m_data_[-block] != value && (block == 0 || overwrite)) {
459                // set the repeatBlock instead of the current block 0 or range
460                // block
461                if (repeatBlock >= 0) {
462                    m_index_[start >> SHIFT_] = -repeatBlock;
463                }
464                else {
465                    // create and set and fill the repeatBlock
466                    repeatBlock = getDataBlock(start);
467                    if (repeatBlock < 0) {
468                        return false;
469                    }
470
471                    // set the negative block number to indicate that it is a
472                    // repeat block
473                    m_index_[start >> SHIFT_] = -repeatBlock;
474                    fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
475                }
476            }
477
478            start += DATA_BLOCK_LENGTH;
479        }
480
481        if (rest > 0) {
482            // set partial block at [last block boundary..limit[
483            int block = getDataBlock(start);
484            if (block < 0) {
485                return false;
486            }
487            fillBlock(block, 0, rest, value, overwrite);
488        }
489
490        return true;
491    }
492
493    // protected data member ------------------------------------------------
494
495    protected int m_data_[];
496    protected int m_initialValue_;
497
498    //  private data member ------------------------------------------------
499
500    private int m_leadUnitValue_;
501
502    // private methods ------------------------------------------------------
503
504    private int allocDataBlock()
505    {
506        int newBlock = m_dataLength_;
507        int newTop = newBlock + DATA_BLOCK_LENGTH;
508        if (newTop > m_dataCapacity_) {
509            // out of memory in the data array
510            return -1;
511        }
512        m_dataLength_ = newTop;
513        return newBlock;
514    }
515
516    /**
517     * No error checking for illegal arguments.
518     * @param ch codepoint to look for
519     * @return -1 if no new data block available (out of memory in data array)
520     */
521    private int getDataBlock(int ch)
522    {
523        ch >>= SHIFT_;
524        int indexValue = m_index_[ch];
525        if (indexValue > 0) {
526            return indexValue;
527        }
528
529        // allocate a new data block
530        int newBlock = allocDataBlock();
531        if (newBlock < 0) {
532            // out of memory in the data array
533            return -1;
534        }
535        m_index_[ch] = newBlock;
536
537        // copy-on-write for a block from a setRange()
538        System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
539                         DATA_BLOCK_LENGTH << 2);
540        return newBlock;
541    }
542
543    /**
544     * Compact a folded build-time trie.
545     * The compaction
546     * - removes blocks that are identical with earlier ones
547     * - overlaps adjacent blocks as much as possible (if overlap == true)
548     * - moves blocks in steps of the data granularity
549     * - moves and overlaps blocks that overlap with multiple values in the overlap region
550     *
551     * It does not
552     * - try to move and overlap blocks that are not already adjacent
553     * @param overlap flag
554     */
555    private void compact(boolean overlap)
556    {
557        if (m_isCompacted_) {
558            return; // nothing left to do
559        }
560
561        // compaction
562        // initialize the index map with "block is used/unused" flags
563        findUnusedBlocks();
564
565        // if Latin-1 is preallocated and linear, then do not compact Latin-1
566        // data
567        int overlapStart = DATA_BLOCK_LENGTH;
568        if (m_isLatin1Linear_ && SHIFT_ <= 8) {
569            overlapStart += 256;
570        }
571
572        int newStart = DATA_BLOCK_LENGTH;
573        int i;
574        for (int start = newStart; start < m_dataLength_;) {
575            // start: index of first entry of current block
576            // newStart: index where the current block is to be moved
577            //           (right after current end of already-compacted data)
578            // skip blocks that are not used
579            if (m_map_[start >>> SHIFT_] < 0) {
580                // advance start to the next block
581                start += DATA_BLOCK_LENGTH;
582                // leave newStart with the previous block!
583                continue;
584            }
585            // search for an identical block
586            if (start >= overlapStart) {
587                i = findSameDataBlock(m_data_, newStart, start,
588                                          overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
589                if (i >= 0) {
590                    // found an identical block, set the other block's index
591                    // value for the current block
592                    m_map_[start >>> SHIFT_] = i;
593                    // advance start to the next block
594                    start += DATA_BLOCK_LENGTH;
595                    // leave newStart with the previous block!
596                    continue;
597                }
598            }
599            // see if the beginning of this block can be overlapped with the
600            // end of the previous block
601            if(overlap && start>=overlapStart) {
602                /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
603                for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
604                    i>0 && !equal_int(m_data_, newStart-i, start, i);
605                    i-=DATA_GRANULARITY_) {}
606            } else {
607                i=0;
608            }
609            if (i > 0) {
610                // some overlap
611                m_map_[start >>> SHIFT_] = newStart - i;
612                // move the non-overlapping indexes to their new positions
613                start += i;
614                for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
615                    m_data_[newStart ++] = m_data_[start ++];
616                }
617            }
618            else if (newStart < start) {
619                // no overlap, just move the indexes to their new positions
620                m_map_[start >>> SHIFT_] = newStart;
621                for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
622                    m_data_[newStart ++] = m_data_[start ++];
623                }
624            }
625            else { // no overlap && newStart==start
626                m_map_[start >>> SHIFT_] = start;
627                newStart += DATA_BLOCK_LENGTH;
628                start = newStart;
629            }
630        }
631        // now adjust the index (stage 1) table
632        for (i = 0; i < m_indexLength_; ++ i) {
633            m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
634        }
635        m_dataLength_ = newStart;
636    }
637
638    /**
639     * Find the same data block
640     * @param data array
641     * @param dataLength
642     * @param otherBlock
643     * @param step
644     */
645    private static final int findSameDataBlock(int data[], int dataLength,
646                                               int otherBlock, int step)
647    {
648        // ensure that we do not even partially get past dataLength
649        dataLength -= DATA_BLOCK_LENGTH;
650
651        for (int block = 0; block <= dataLength; block += step) {
652            if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
653                return block;
654            }
655        }
656        return -1;
657    }
658
659    /**
660     * Fold the normalization data for supplementary code points into
661     * a compact area on top of the BMP-part of the trie index,
662     * with the lead surrogates indexing this compact area.
663     *
664     * Duplicate the index values for lead surrogates:
665     * From inside the BMP area, where some may be overridden with folded values,
666     * to just after the BMP area, where they can be retrieved for
667     * code point lookups.
668     * @param manipulate fold implementation
669     */
670    private final void fold(DataManipulate manipulate)
671    {
672        int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
673        int index[] = m_index_;
674        // copy the lead surrogate indexes into a temporary array
675        System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
676                         SURROGATE_BLOCK_COUNT_);
677
678        // set all values for lead surrogate code *units* to leadUnitValue
679        // so that by default runtime lookups will find no data for associated
680        // supplementary code points, unless there is data for such code points
681        // which will result in a non-zero folding value below that is set for
682        // the respective lead units
683        // the above saved the indexes for surrogate code *points*
684        // fill the indexes with simplified code from utrie_setRange32()
685        int block = 0;
686        if (m_leadUnitValue_ == m_initialValue_) {
687            // leadUnitValue == initialValue, use all-initial-value block
688            // block = 0; if block here left empty
689        }
690        else {
691            // create and fill the repeatBlock
692            block = allocDataBlock();
693            if (block < 0) {
694                // data table overflow
695                throw new IllegalStateException("Internal error: Out of memory space");
696            }
697            fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
698            // negative block number to indicate that it is a repeat block
699            block = -block;
700        }
701        for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
702            m_index_[c] = block;
703        }
704
705        // Fold significant index values into the area just after the BMP
706        // indexes.
707        // In case the first lead surrogate has significant data,
708        // its index block must be used first (in which case the folding is a
709        // no-op).
710        // Later all folded index blocks are moved up one to insert the copied
711        // lead surrogate indexes.
712        int indexLength = BMP_INDEX_LENGTH_;
713        // search for any index (stage 1) entries for supplementary code points
714        for (int c = 0x10000; c < 0x110000;) {
715            if (index[c >> SHIFT_] != 0) {
716                // there is data, treat the full block for a lead surrogate
717                c &= ~0x3ff;
718                // is there an identical index block?
719                block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
720
721                // get a folded value for [c..c+0x400[ and,
722                // if different from the value for the lead surrogate code
723                // point, set it for the lead surrogate code unit
724
725                int value = manipulate.getFoldedValue(c,
726                                                      block + SURROGATE_BLOCK_COUNT_);
727                if (value != getValue(UTF16.getLeadSurrogate(c))) {
728                    if (!setValue(UTF16.getLeadSurrogate(c), value)) {
729                        // data table overflow
730                        throw new ArrayIndexOutOfBoundsException(
731                                                                 "Data table overflow");
732                    }
733                    // if we did not find an identical index block...
734                    if (block == indexLength) {
735                        // move the actual index (stage 1) entries from the
736                        // supplementary position to the new one
737                        System.arraycopy(index, c >> SHIFT_, index, indexLength,
738                                         SURROGATE_BLOCK_COUNT_);
739                        indexLength += SURROGATE_BLOCK_COUNT_;
740                    }
741                }
742                c += 0x400;
743            }
744            else {
745                c += DATA_BLOCK_LENGTH;
746            }
747        }
748
749        // index array overflow?
750        // This is to guarantee that a folding offset is of the form
751        // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
752        // If the index is too large, then n>=1024 and more than 10 bits are
753        // necessary.
754        // In fact, it can only ever become n==1024 with completely unfoldable
755        // data and the additional block of duplicated values for lead
756        // surrogates.
757        if (indexLength >= MAX_INDEX_LENGTH_) {
758            throw new ArrayIndexOutOfBoundsException("Index table overflow");
759        }
760        // make space for the lead surrogate index block and insert it between
761        // the BMP indexes and the folded ones
762        System.arraycopy(index, BMP_INDEX_LENGTH_, index,
763                         BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
764                         indexLength - BMP_INDEX_LENGTH_);
765        System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
766                         SURROGATE_BLOCK_COUNT_);
767        indexLength += SURROGATE_BLOCK_COUNT_;
768        m_indexLength_ = indexLength;
769    }
770
771    /**
772     * @internal
773     */
774    private void fillBlock(int block, int start, int limit, int value,
775                           boolean overwrite)
776    {
777        limit += block;
778        block += start;
779        if (overwrite) {
780            while (block < limit) {
781                m_data_[block ++] = value;
782            }
783        }
784        else {
785            while (block < limit) {
786                if (m_data_[block] == m_initialValue_) {
787                    m_data_[block] = value;
788                }
789                ++ block;
790            }
791        }
792    }
793}
794
795