1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 ******************************************************************************
6 * Copyright (C) 1996-2011, International Business Machines Corporation and   *
7 * others. All Rights Reserved.                                               *
8 ******************************************************************************
9 */
10
11/**
12 * Store bits (Unicode character properties) in bit set vectors.
13 *
14 * This is a port of the C++ class UPropsVectors from ICU4C
15 *
16 * @author Shaopeng Jia
17 * @hide draft / provisional / internal are hidden on Android
18 */
19
20package android.icu.impl;
21
22import java.util.Arrays;
23import java.util.Comparator;
24
25/**
26 * Unicode Properties Vectors associated with code point ranges.
27 *
28 * Rows of primitive integers in a contiguous array store the range limits and
29 * the properties vectors.
30 *
31 * In each row, row[0] contains the start code point and row[1] contains the
32 * limit code point, which is the start of the next range.
33 *
34 * Initially, there is only one range [0..0x110000] with values 0.
35 *
36 * It would be possible to store only one range boundary per row, but
37 * self-contained rows allow to later sort them by contents.
38 * @hide Only a subset of ICU is exposed in Android
39 */
40public class PropsVectors {
41    private int v[];
42    private int columns; // number of columns, plus two for start
43    // and limit values
44    private int maxRows;
45    private int rows;
46    private int prevRow; // search optimization: remember last row seen
47    private boolean isCompacted;
48
49    // internal function to compare elements in v and target. Return true iff
50    // elements in v starting from index1 to index1 + length - 1
51    // are exactly the same as elements in target
52    // starting from index2 to index2 + length - 1
53    private boolean areElementsSame(int index1, int[] target, int index2,
54            int length) {
55        for (int i = 0; i < length; ++i) {
56            if (v[index1 + i] != target[index2 + i]) {
57                return false;
58            }
59        }
60        return true;
61    }
62
63    // internal function which given rangeStart, returns
64    // index where v[index]<=rangeStart<v[index+1].
65    // The returned index is a multiple of columns, and therefore
66    // points to the start of a row.
67    private int findRow(int rangeStart) {
68        int index = 0;
69
70        // check the vicinity of the last-seen row (start
71        // searching with an unrolled loop)
72
73        index = prevRow * columns;
74        if (rangeStart >= v[index]) {
75            if (rangeStart < v[index + 1]) {
76                // same row as last seen
77                return index;
78            } else {
79                index += columns;
80                if (rangeStart < v[index + 1]) {
81                    ++prevRow;
82                    return index;
83                } else {
84                    index += columns;
85                    if (rangeStart < v[index + 1]) {
86                        prevRow += 2;
87                        return index;
88                    } else if ((rangeStart - v[index + 1]) < 10) {
89                        // we are close, continue looping
90                        prevRow += 2;
91                        do {
92                            ++prevRow;
93                            index += columns;
94                        } while (rangeStart >= v[index + 1]);
95                        return index;
96                    }
97                }
98            }
99        } else if (rangeStart < v[1]) {
100            // the very first row
101            prevRow = 0;
102            return 0;
103        }
104
105        // do a binary search for the start of the range
106        int start = 0;
107        int mid = 0;
108        int limit = rows;
109        while (start < limit - 1) {
110            mid = (start + limit) / 2;
111            index = columns * mid;
112            if (rangeStart < v[index]) {
113                limit = mid;
114            } else if (rangeStart < v[index + 1]) {
115                prevRow = mid;
116                return index;
117            } else {
118                start = mid;
119            }
120        }
121
122        // must be found because all ranges together always cover
123        // all of Unicode
124        prevRow = start;
125        index = start * columns;
126        return index;
127    }
128
129    /*
130     * Special pseudo code points for storing the initialValue and the
131     * errorValue which are used to initialize a Trie or similar.
132     */
133    public final static int FIRST_SPECIAL_CP = 0x110000;
134    public final static int INITIAL_VALUE_CP = 0x110000;
135    public final static int ERROR_VALUE_CP = 0x110001;
136    public final static int MAX_CP = 0x110001;
137
138    public final static int INITIAL_ROWS = 1 << 12;
139    public final static int MEDIUM_ROWS = 1 << 16;
140    public final static int MAX_ROWS = MAX_CP + 1;
141
142    /*
143     * Constructor.
144     * @param numOfColumns Number of value integers (32-bit int) per row.
145     */
146    public PropsVectors(int numOfColumns) {
147        if (numOfColumns < 1) {
148            throw new IllegalArgumentException("numOfColumns need to be no "
149                    + "less than 1; but it is " + numOfColumns);
150        }
151        columns = numOfColumns + 2; // count range start and limit columns
152        v = new int[INITIAL_ROWS * columns];
153        maxRows = INITIAL_ROWS;
154        rows = 2 + (MAX_CP - FIRST_SPECIAL_CP);
155        prevRow = 0;
156        isCompacted = false;
157        v[0] = 0;
158        v[1] = 0x110000;
159        int index = columns;
160        for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) {
161            v[index] = cp;
162            v[index + 1] = cp + 1;
163            index += columns;
164        }
165    }
166
167    /*
168     * In rows for code points [start..end], select the column, reset the mask
169     * bits and set the value bits (ANDed with the mask).
170     *
171     * @throws IllegalArgumentException
172     *
173     * @throws IllegalStateException
174     *
175     * @throws IndexOutOfBoundsException
176     */
177    public void setValue(int start, int end, int column, int value, int mask) {
178        if (start < 0 || start > end || end > MAX_CP || column < 0
179                || column >= (columns - 2)) {
180            throw new IllegalArgumentException();
181        }
182        if (isCompacted) {
183            throw new IllegalStateException("Shouldn't be called after"
184                    + "compact()!");
185        }
186
187        int firstRow, lastRow;
188        int limit = end + 1;
189        boolean splitFirstRow, splitLastRow;
190        // skip range start and limit columns
191        column += 2;
192        value &= mask;
193
194        // find the rows whose ranges overlap with the input range
195        // find the first and last row, always successful
196        firstRow = findRow(start);
197        lastRow = findRow(end);
198
199        /*
200         * Rows need to be split if they partially overlap with the input range
201         * (only possible for the first and last rows) and if their value
202         * differs from the input value.
203         */
204        splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask));
205        splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask));
206
207        // split first/last rows if necessary
208        if (splitFirstRow || splitLastRow) {
209            int rowsToExpand = 0;
210            if (splitFirstRow) {
211                ++rowsToExpand;
212            }
213            if (splitLastRow) {
214                ++rowsToExpand;
215            }
216            int newMaxRows = 0;
217            if ((rows + rowsToExpand) > maxRows) {
218                if (maxRows < MEDIUM_ROWS) {
219                    newMaxRows = MEDIUM_ROWS;
220                } else if (maxRows < MAX_ROWS) {
221                    newMaxRows = MAX_ROWS;
222                } else {
223                    throw new IndexOutOfBoundsException(
224                            "MAX_ROWS exceeded! Increase it to a higher value" +
225                            "in the implementation");
226                }
227                int[] temp = new int[newMaxRows * columns];
228                System.arraycopy(v, 0, temp, 0, rows * columns);
229                v = temp;
230                maxRows = newMaxRows;
231            }
232
233            // count the number of row cells to move after the last row,
234            // and move them
235            int count = (rows * columns) - (lastRow + columns);
236            if (count > 0) {
237                System.arraycopy(v, lastRow + columns, v, lastRow
238                        + (1 + rowsToExpand) * columns, count);
239            }
240            rows += rowsToExpand;
241
242            // split the first row, and move the firstRow pointer
243            // to the second part
244            if (splitFirstRow) {
245                // copy all affected rows up one and move the lastRow pointer
246                count = lastRow - firstRow + columns;
247                System.arraycopy(v, firstRow, v, firstRow + columns, count);
248                lastRow += columns;
249
250                // split the range and move the firstRow pointer
251                v[firstRow + 1] = v[firstRow + columns] = start;
252                firstRow += columns;
253            }
254
255            // split the last row
256            if (splitLastRow) {
257                // copy the last row data
258                System.arraycopy(v, lastRow, v, lastRow + columns, columns);
259
260                // split the range and move the firstRow pointer
261                v[lastRow + 1] = v[lastRow + columns] = limit;
262            }
263        }
264
265        // set the "row last seen" to the last row for the range
266        prevRow = lastRow / columns;
267
268        // set the input value in all remaining rows
269        firstRow += column;
270        lastRow += column;
271        mask = ~mask;
272        for (;;) {
273            v[firstRow] = (v[firstRow] & mask) | value;
274            if (firstRow == lastRow) {
275                break;
276            }
277            firstRow += columns;
278        }
279    }
280
281    /*
282     * Always returns 0 if called after compact().
283     */
284    public int getValue(int c, int column) {
285        if (isCompacted || c < 0 || c > MAX_CP || column < 0
286                || column >= (columns - 2)) {
287            return 0;
288        }
289        int index = findRow(c);
290        return v[index + 2 + column];
291    }
292
293    /*
294     * Returns an array which contains value elements
295     * in row rowIndex.
296     *
297     * @throws IllegalStateException
298     * @throws IllegalArgumentException
299     */
300    public int[] getRow(int rowIndex) {
301        if (isCompacted) {
302            throw new IllegalStateException(
303                    "Illegal Invocation of the method after compact()");
304        }
305        if (rowIndex < 0 || rowIndex > rows) {
306            throw new IllegalArgumentException("rowIndex out of bound!");
307        }
308        int[] rowToReturn = new int[columns - 2];
309        System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0,
310                         columns - 2);
311        return rowToReturn;
312    }
313
314    /*
315     * Returns an int which is the start codepoint
316     * in row rowIndex.
317     *
318     * @throws IllegalStateException
319     *
320     * @throws IllegalArgumentException
321     */
322    public int getRowStart(int rowIndex) {
323        if (isCompacted) {
324            throw new IllegalStateException(
325                    "Illegal Invocation of the method after compact()");
326        }
327        if (rowIndex < 0 || rowIndex > rows) {
328            throw new IllegalArgumentException("rowIndex out of bound!");
329        }
330        return v[rowIndex * columns];
331    }
332
333    /*
334     * Returns an int which is the limit codepoint
335     * minus 1 in row rowIndex.
336     *
337     * @throws IllegalStateException
338     *
339     * @throws IllegalArgumentException
340     */
341    public int getRowEnd(int rowIndex) {
342        if (isCompacted) {
343            throw new IllegalStateException(
344                    "Illegal Invocation of the method after compact()");
345        }
346        if (rowIndex < 0 || rowIndex > rows) {
347            throw new IllegalArgumentException("rowIndex out of bound!");
348        }
349        return v[rowIndex * columns + 1] - 1;
350    }
351
352    /*
353     * Compact the vectors:
354     * - modify the memory
355     * - keep only unique vectors
356     * - store them contiguously from the beginning of the memory
357     * - for each (non-unique) row, call the respective function in
358     *   CompactHandler
359     *
360     * The handler's rowIndex is the index of the row in the compacted
361     * memory block. Therefore, it starts at 0 increases in increments of the
362     * columns value.
363     *
364     * In a first phase, only special values are delivered (each exactly once).
365     * Then CompactHandler::startRealValues() is called
366     * where rowIndex is the length of the compacted array.
367     * Then, in the second phase, the CompactHandler::setRowIndexForRange() is
368     * called for each row of real values.
369     */
370    public void compact(CompactHandler compactor) {
371        if (isCompacted) {
372            return;
373        }
374
375        // Set the flag now: Sorting and compacting destroys the builder
376        // data structure.
377        isCompacted = true;
378        int valueColumns = columns - 2; // not counting start & limit
379
380        // sort the properties vectors to find unique vector values
381        Integer[] indexArray = new Integer[rows];
382        for (int i = 0; i < rows; ++i) {
383            indexArray[i] = Integer.valueOf(columns * i);
384        }
385
386        Arrays.sort(indexArray, new Comparator<Integer>() {
387            @Override
388            public int compare(Integer o1, Integer o2) {
389                int indexOfRow1 = o1.intValue();
390                int indexOfRow2 = o2.intValue();
391                int count = columns; // includes start/limit columns
392
393                // start comparing after start/limit
394                // but wrap around to them
395                int index = 2;
396                do {
397                    if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) {
398                        return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1
399                                : 1;
400                    }
401                    if (++index == columns) {
402                        index = 0;
403                    }
404                } while (--count > 0);
405
406                return 0;
407            }
408        });
409
410        /*
411         * Find and set the special values. This has to do almost the same work
412         * as the compaction below, to find the indexes where the special-value
413         * rows will move.
414         */
415        int count = -valueColumns;
416        for (int i = 0; i < rows; ++i) {
417            int start = v[indexArray[i].intValue()];
418
419            // count a new values vector if it is different
420            // from the current one
421            if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v,
422                    indexArray[i-1].intValue() + 2, valueColumns)) {
423                count += valueColumns;
424            }
425
426            if (start == INITIAL_VALUE_CP) {
427                compactor.setRowIndexForInitialValue(count);
428            } else if (start == ERROR_VALUE_CP) {
429                compactor.setRowIndexForErrorValue(count);
430            }
431        }
432
433        // count is at the beginning of the last vector,
434        // add valueColumns to include that last vector
435        count += valueColumns;
436
437        // Call the handler once more to signal the start of
438        // delivering real values.
439        compactor.startRealValues(count);
440
441        /*
442         * Move vector contents up to a contiguous array with only unique
443         * vector values, and call the handler function for each vector.
444         *
445         * This destroys the Properties Vector structure and replaces it
446         * with an array of just vector values.
447         */
448        int[] temp = new int[count];
449        count = -valueColumns;
450        for (int i = 0; i < rows; ++i) {
451            int start = v[indexArray[i].intValue()];
452            int limit = v[indexArray[i].intValue() + 1];
453
454            // count a new values vector if it is different
455            // from the current one
456            if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2,
457                    temp, count, valueColumns)) {
458                count += valueColumns;
459                System.arraycopy(v, indexArray[i].intValue() + 2, temp, count,
460                        valueColumns);
461            }
462
463            if (start < FIRST_SPECIAL_CP) {
464                compactor.setRowIndexForRange(start, limit - 1, count);
465            }
466        }
467        v = temp;
468
469        // count is at the beginning of the last vector,
470        // add one to include that last vector
471        rows = count / valueColumns + 1;
472    }
473
474    /*
475     * Get the vectors array after calling compact().
476     *
477     * @throws IllegalStateException
478     */
479    public int[] getCompactedArray() {
480        if (!isCompacted) {
481            throw new IllegalStateException(
482                    "Illegal Invocation of the method before compact()");
483        }
484        return v;
485    }
486
487    /*
488     * Get the number of rows for the compacted array.
489     *
490     * @throws IllegalStateException
491     */
492    public int getCompactedRows() {
493        if (!isCompacted) {
494            throw new IllegalStateException(
495                    "Illegal Invocation of the method before compact()");
496        }
497        return rows;
498    }
499
500    /*
501     * Get the number of columns for the compacted array.
502     *
503     * @throws IllegalStateException
504     */
505    public int getCompactedColumns() {
506        if (!isCompacted) {
507            throw new IllegalStateException(
508                    "Illegal Invocation of the method before compact()");
509        }
510        return columns - 2;
511    }
512
513    /*
514     * Call compact(), create a IntTrie with indexes into the compacted
515     * vectors array.
516     */
517    public IntTrie compactToTrieWithRowIndexes() {
518        PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler();
519        compact(compactor);
520        return compactor.builder.serialize(new DefaultGetFoldedValue(
521                compactor.builder), new DefaultGetFoldingOffset());
522    }
523
524    // inner class implementation of Trie.DataManipulate
525    private static class DefaultGetFoldingOffset implements Trie.DataManipulate {
526        @Override
527        public int getFoldingOffset(int value) {
528            return value;
529        }
530    }
531
532    // inner class implementation of TrieBuilder.DataManipulate
533    private static class DefaultGetFoldedValue implements
534            TrieBuilder.DataManipulate {
535        private IntTrieBuilder builder;
536
537        public DefaultGetFoldedValue(IntTrieBuilder inBuilder) {
538            builder = inBuilder;
539        }
540
541        @Override
542        public int getFoldedValue(int start, int offset) {
543            int initialValue = builder.m_initialValue_;
544            int limit = start + 0x400;
545            while (start < limit) {
546                boolean[] inBlockZero = new boolean[1];
547                int value = builder.getValue(start, inBlockZero);
548                if (inBlockZero[0]) {
549                    start += TrieBuilder.DATA_BLOCK_LENGTH;
550                } else if (value != initialValue) {
551                    return offset;
552                } else {
553                    ++start;
554                }
555            }
556            return 0;
557        }
558    }
559
560    public static interface CompactHandler {
561        public void setRowIndexForRange(int start, int end, int rowIndex);
562        public void setRowIndexForInitialValue(int rowIndex);
563        public void setRowIndexForErrorValue(int rowIndex);
564        public void startRealValues(int rowIndex);
565    }
566}