1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 ******************************************************************************
5 * Copyright (C) 1996-2011, International Business Machines Corporation and   *
6 * others. All Rights Reserved.                                               *
7 ******************************************************************************
8 */
9
10/**
11 * Store bits (Unicode character properties) in bit set vectors.
12 *
13 * This is a port of the C++ class UPropsVectors from ICU4C
14 *
15 * @author Shaopeng Jia
16 * @internal
17 */
18
19package com.ibm.icu.impl;
20
21import java.util.Arrays;
22import java.util.Comparator;
23
24/**
25 * Unicode Properties Vectors associated with code point ranges.
26 *
27 * Rows of primitive integers in a contiguous array store the range limits and
28 * the properties vectors.
29 *
30 * In each row, row[0] contains the start code point and row[1] contains the
31 * limit code point, which is the start of the next range.
32 *
33 * Initially, there is only one range [0..0x110000] with values 0.
34 *
35 * It would be possible to store only one range boundary per row, but
36 * self-contained rows allow to later sort them by contents.
37 */
38public class PropsVectors {
39    private int v[];
40    private int columns; // number of columns, plus two for start
41    // and limit values
42    private int maxRows;
43    private int rows;
44    private int prevRow; // search optimization: remember last row seen
45    private boolean isCompacted;
46
47    // internal function to compare elements in v and target. Return true iff
48    // elements in v starting from index1 to index1 + length - 1
49    // are exactly the same as elements in target
50    // starting from index2 to index2 + length - 1
51    private boolean areElementsSame(int index1, int[] target, int index2,
52            int length) {
53        for (int i = 0; i < length; ++i) {
54            if (v[index1 + i] != target[index2 + i]) {
55                return false;
56            }
57        }
58        return true;
59    }
60
61    // internal function which given rangeStart, returns
62    // index where v[index]<=rangeStart<v[index+1].
63    // The returned index is a multiple of columns, and therefore
64    // points to the start of a row.
65    private int findRow(int rangeStart) {
66        int index = 0;
67
68        // check the vicinity of the last-seen row (start
69        // searching with an unrolled loop)
70
71        index = prevRow * columns;
72        if (rangeStart >= v[index]) {
73            if (rangeStart < v[index + 1]) {
74                // same row as last seen
75                return index;
76            } else {
77                index += columns;
78                if (rangeStart < v[index + 1]) {
79                    ++prevRow;
80                    return index;
81                } else {
82                    index += columns;
83                    if (rangeStart < v[index + 1]) {
84                        prevRow += 2;
85                        return index;
86                    } else if ((rangeStart - v[index + 1]) < 10) {
87                        // we are close, continue looping
88                        prevRow += 2;
89                        do {
90                            ++prevRow;
91                            index += columns;
92                        } while (rangeStart >= v[index + 1]);
93                        return index;
94                    }
95                }
96            }
97        } else if (rangeStart < v[1]) {
98            // the very first row
99            prevRow = 0;
100            return 0;
101        }
102
103        // do a binary search for the start of the range
104        int start = 0;
105        int mid = 0;
106        int limit = rows;
107        while (start < limit - 1) {
108            mid = (start + limit) / 2;
109            index = columns * mid;
110            if (rangeStart < v[index]) {
111                limit = mid;
112            } else if (rangeStart < v[index + 1]) {
113                prevRow = mid;
114                return index;
115            } else {
116                start = mid;
117            }
118        }
119
120        // must be found because all ranges together always cover
121        // all of Unicode
122        prevRow = start;
123        index = start * columns;
124        return index;
125    }
126
127    /*
128     * Special pseudo code points for storing the initialValue and the
129     * errorValue which are used to initialize a Trie or similar.
130     */
131    public final static int FIRST_SPECIAL_CP = 0x110000;
132    public final static int INITIAL_VALUE_CP = 0x110000;
133    public final static int ERROR_VALUE_CP = 0x110001;
134    public final static int MAX_CP = 0x110001;
135
136    public final static int INITIAL_ROWS = 1 << 12;
137    public final static int MEDIUM_ROWS = 1 << 16;
138    public final static int MAX_ROWS = MAX_CP + 1;
139
140    /*
141     * Constructor.
142     * @param numOfColumns Number of value integers (32-bit int) per row.
143     */
144    public PropsVectors(int numOfColumns) {
145        if (numOfColumns < 1) {
146            throw new IllegalArgumentException("numOfColumns need to be no "
147                    + "less than 1; but it is " + numOfColumns);
148        }
149        columns = numOfColumns + 2; // count range start and limit columns
150        v = new int[INITIAL_ROWS * columns];
151        maxRows = INITIAL_ROWS;
152        rows = 2 + (MAX_CP - FIRST_SPECIAL_CP);
153        prevRow = 0;
154        isCompacted = false;
155        v[0] = 0;
156        v[1] = 0x110000;
157        int index = columns;
158        for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) {
159            v[index] = cp;
160            v[index + 1] = cp + 1;
161            index += columns;
162        }
163    }
164
165    /*
166     * In rows for code points [start..end], select the column, reset the mask
167     * bits and set the value bits (ANDed with the mask).
168     *
169     * @throws IllegalArgumentException
170     *
171     * @throws IllegalStateException
172     *
173     * @throws IndexOutOfBoundsException
174     */
175    public void setValue(int start, int end, int column, int value, int mask) {
176        if (start < 0 || start > end || end > MAX_CP || column < 0
177                || column >= (columns - 2)) {
178            throw new IllegalArgumentException();
179        }
180        if (isCompacted) {
181            throw new IllegalStateException("Shouldn't be called after"
182                    + "compact()!");
183        }
184
185        int firstRow, lastRow;
186        int limit = end + 1;
187        boolean splitFirstRow, splitLastRow;
188        // skip range start and limit columns
189        column += 2;
190        value &= mask;
191
192        // find the rows whose ranges overlap with the input range
193        // find the first and last row, always successful
194        firstRow = findRow(start);
195        lastRow = findRow(end);
196
197        /*
198         * Rows need to be split if they partially overlap with the input range
199         * (only possible for the first and last rows) and if their value
200         * differs from the input value.
201         */
202        splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask));
203        splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask));
204
205        // split first/last rows if necessary
206        if (splitFirstRow || splitLastRow) {
207            int rowsToExpand = 0;
208            if (splitFirstRow) {
209                ++rowsToExpand;
210            }
211            if (splitLastRow) {
212                ++rowsToExpand;
213            }
214            int newMaxRows = 0;
215            if ((rows + rowsToExpand) > maxRows) {
216                if (maxRows < MEDIUM_ROWS) {
217                    newMaxRows = MEDIUM_ROWS;
218                } else if (maxRows < MAX_ROWS) {
219                    newMaxRows = MAX_ROWS;
220                } else {
221                    throw new IndexOutOfBoundsException(
222                            "MAX_ROWS exceeded! Increase it to a higher value" +
223                            "in the implementation");
224                }
225                int[] temp = new int[newMaxRows * columns];
226                System.arraycopy(v, 0, temp, 0, rows * columns);
227                v = temp;
228                maxRows = newMaxRows;
229            }
230
231            // count the number of row cells to move after the last row,
232            // and move them
233            int count = (rows * columns) - (lastRow + columns);
234            if (count > 0) {
235                System.arraycopy(v, lastRow + columns, v, lastRow
236                        + (1 + rowsToExpand) * columns, count);
237            }
238            rows += rowsToExpand;
239
240            // split the first row, and move the firstRow pointer
241            // to the second part
242            if (splitFirstRow) {
243                // copy all affected rows up one and move the lastRow pointer
244                count = lastRow - firstRow + columns;
245                System.arraycopy(v, firstRow, v, firstRow + columns, count);
246                lastRow += columns;
247
248                // split the range and move the firstRow pointer
249                v[firstRow + 1] = v[firstRow + columns] = start;
250                firstRow += columns;
251            }
252
253            // split the last row
254            if (splitLastRow) {
255                // copy the last row data
256                System.arraycopy(v, lastRow, v, lastRow + columns, columns);
257
258                // split the range and move the firstRow pointer
259                v[lastRow + 1] = v[lastRow + columns] = limit;
260            }
261        }
262
263        // set the "row last seen" to the last row for the range
264        prevRow = lastRow / columns;
265
266        // set the input value in all remaining rows
267        firstRow += column;
268        lastRow += column;
269        mask = ~mask;
270        for (;;) {
271            v[firstRow] = (v[firstRow] & mask) | value;
272            if (firstRow == lastRow) {
273                break;
274            }
275            firstRow += columns;
276        }
277    }
278
279    /*
280     * Always returns 0 if called after compact().
281     */
282    public int getValue(int c, int column) {
283        if (isCompacted || c < 0 || c > MAX_CP || column < 0
284                || column >= (columns - 2)) {
285            return 0;
286        }
287        int index = findRow(c);
288        return v[index + 2 + column];
289    }
290
291    /*
292     * Returns an array which contains value elements
293     * in row rowIndex.
294     *
295     * @throws IllegalStateException
296     * @throws IllegalArgumentException
297     */
298    public int[] getRow(int rowIndex) {
299        if (isCompacted) {
300            throw new IllegalStateException(
301                    "Illegal Invocation of the method after compact()");
302        }
303        if (rowIndex < 0 || rowIndex > rows) {
304            throw new IllegalArgumentException("rowIndex out of bound!");
305        }
306        int[] rowToReturn = new int[columns - 2];
307        System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0,
308                         columns - 2);
309        return rowToReturn;
310    }
311
312    /*
313     * Returns an int which is the start codepoint
314     * in row rowIndex.
315     *
316     * @throws IllegalStateException
317     *
318     * @throws IllegalArgumentException
319     */
320    public int getRowStart(int rowIndex) {
321        if (isCompacted) {
322            throw new IllegalStateException(
323                    "Illegal Invocation of the method after compact()");
324        }
325        if (rowIndex < 0 || rowIndex > rows) {
326            throw new IllegalArgumentException("rowIndex out of bound!");
327        }
328        return v[rowIndex * columns];
329    }
330
331    /*
332     * Returns an int which is the limit codepoint
333     * minus 1 in row rowIndex.
334     *
335     * @throws IllegalStateException
336     *
337     * @throws IllegalArgumentException
338     */
339    public int getRowEnd(int rowIndex) {
340        if (isCompacted) {
341            throw new IllegalStateException(
342                    "Illegal Invocation of the method after compact()");
343        }
344        if (rowIndex < 0 || rowIndex > rows) {
345            throw new IllegalArgumentException("rowIndex out of bound!");
346        }
347        return v[rowIndex * columns + 1] - 1;
348    }
349
350    /*
351     * Compact the vectors:
352     * - modify the memory
353     * - keep only unique vectors
354     * - store them contiguously from the beginning of the memory
355     * - for each (non-unique) row, call the respective function in
356     *   CompactHandler
357     *
358     * The handler's rowIndex is the index of the row in the compacted
359     * memory block. Therefore, it starts at 0 increases in increments of the
360     * columns value.
361     *
362     * In a first phase, only special values are delivered (each exactly once).
363     * Then CompactHandler::startRealValues() is called
364     * where rowIndex is the length of the compacted array.
365     * Then, in the second phase, the CompactHandler::setRowIndexForRange() is
366     * called for each row of real values.
367     */
368    public void compact(CompactHandler compactor) {
369        if (isCompacted) {
370            return;
371        }
372
373        // Set the flag now: Sorting and compacting destroys the builder
374        // data structure.
375        isCompacted = true;
376        int valueColumns = columns - 2; // not counting start & limit
377
378        // sort the properties vectors to find unique vector values
379        Integer[] indexArray = new Integer[rows];
380        for (int i = 0; i < rows; ++i) {
381            indexArray[i] = Integer.valueOf(columns * i);
382        }
383
384        Arrays.sort(indexArray, new Comparator<Integer>() {
385            @Override
386            public int compare(Integer o1, Integer o2) {
387                int indexOfRow1 = o1.intValue();
388                int indexOfRow2 = o2.intValue();
389                int count = columns; // includes start/limit columns
390
391                // start comparing after start/limit
392                // but wrap around to them
393                int index = 2;
394                do {
395                    if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) {
396                        return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1
397                                : 1;
398                    }
399                    if (++index == columns) {
400                        index = 0;
401                    }
402                } while (--count > 0);
403
404                return 0;
405            }
406        });
407
408        /*
409         * Find and set the special values. This has to do almost the same work
410         * as the compaction below, to find the indexes where the special-value
411         * rows will move.
412         */
413        int count = -valueColumns;
414        for (int i = 0; i < rows; ++i) {
415            int start = v[indexArray[i].intValue()];
416
417            // count a new values vector if it is different
418            // from the current one
419            if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v,
420                    indexArray[i-1].intValue() + 2, valueColumns)) {
421                count += valueColumns;
422            }
423
424            if (start == INITIAL_VALUE_CP) {
425                compactor.setRowIndexForInitialValue(count);
426            } else if (start == ERROR_VALUE_CP) {
427                compactor.setRowIndexForErrorValue(count);
428            }
429        }
430
431        // count is at the beginning of the last vector,
432        // add valueColumns to include that last vector
433        count += valueColumns;
434
435        // Call the handler once more to signal the start of
436        // delivering real values.
437        compactor.startRealValues(count);
438
439        /*
440         * Move vector contents up to a contiguous array with only unique
441         * vector values, and call the handler function for each vector.
442         *
443         * This destroys the Properties Vector structure and replaces it
444         * with an array of just vector values.
445         */
446        int[] temp = new int[count];
447        count = -valueColumns;
448        for (int i = 0; i < rows; ++i) {
449            int start = v[indexArray[i].intValue()];
450            int limit = v[indexArray[i].intValue() + 1];
451
452            // count a new values vector if it is different
453            // from the current one
454            if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2,
455                    temp, count, valueColumns)) {
456                count += valueColumns;
457                System.arraycopy(v, indexArray[i].intValue() + 2, temp, count,
458                        valueColumns);
459            }
460
461            if (start < FIRST_SPECIAL_CP) {
462                compactor.setRowIndexForRange(start, limit - 1, count);
463            }
464        }
465        v = temp;
466
467        // count is at the beginning of the last vector,
468        // add one to include that last vector
469        rows = count / valueColumns + 1;
470    }
471
472    /*
473     * Get the vectors array after calling compact().
474     *
475     * @throws IllegalStateException
476     */
477    public int[] getCompactedArray() {
478        if (!isCompacted) {
479            throw new IllegalStateException(
480                    "Illegal Invocation of the method before compact()");
481        }
482        return v;
483    }
484
485    /*
486     * Get the number of rows for the compacted array.
487     *
488     * @throws IllegalStateException
489     */
490    public int getCompactedRows() {
491        if (!isCompacted) {
492            throw new IllegalStateException(
493                    "Illegal Invocation of the method before compact()");
494        }
495        return rows;
496    }
497
498    /*
499     * Get the number of columns for the compacted array.
500     *
501     * @throws IllegalStateException
502     */
503    public int getCompactedColumns() {
504        if (!isCompacted) {
505            throw new IllegalStateException(
506                    "Illegal Invocation of the method before compact()");
507        }
508        return columns - 2;
509    }
510
511    /*
512     * Call compact(), create a IntTrie with indexes into the compacted
513     * vectors array.
514     */
515    public IntTrie compactToTrieWithRowIndexes() {
516        PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler();
517        compact(compactor);
518        return compactor.builder.serialize(new DefaultGetFoldedValue(
519                compactor.builder), new DefaultGetFoldingOffset());
520    }
521
522    // inner class implementation of Trie.DataManipulate
523    private static class DefaultGetFoldingOffset implements Trie.DataManipulate {
524        @Override
525        public int getFoldingOffset(int value) {
526            return value;
527        }
528    }
529
530    // inner class implementation of TrieBuilder.DataManipulate
531    private static class DefaultGetFoldedValue implements
532            TrieBuilder.DataManipulate {
533        private IntTrieBuilder builder;
534
535        public DefaultGetFoldedValue(IntTrieBuilder inBuilder) {
536            builder = inBuilder;
537        }
538
539        @Override
540        public int getFoldedValue(int start, int offset) {
541            int initialValue = builder.m_initialValue_;
542            int limit = start + 0x400;
543            while (start < limit) {
544                boolean[] inBlockZero = new boolean[1];
545                int value = builder.getValue(start, inBlockZero);
546                if (inBlockZero[0]) {
547                    start += TrieBuilder.DATA_BLOCK_LENGTH;
548                } else if (value != initialValue) {
549                    return offset;
550                } else {
551                    ++start;
552                }
553            }
554            return 0;
555        }
556    }
557
558    public static interface CompactHandler {
559        public void setRowIndexForRange(int start, int end, int rowIndex);
560        public void setRowIndexForInitialValue(int rowIndex);
561        public void setRowIndexForErrorValue(int rowIndex);
562        public void startRealValues(int rowIndex);
563    }
564}