1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/* 5 ****************************************************************************** 6 * Copyright (C) 1996-2011, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ****************************************************************************** 9 */ 10 11/** 12 * Store bits (Unicode character properties) in bit set vectors. 13 * 14 * This is a port of the C++ class UPropsVectors from ICU4C 15 * 16 * @author Shaopeng Jia 17 * @hide draft / provisional / internal are hidden on Android 18 */ 19 20package android.icu.impl; 21 22import java.util.Arrays; 23import java.util.Comparator; 24 25/** 26 * Unicode Properties Vectors associated with code point ranges. 27 * 28 * Rows of primitive integers in a contiguous array store the range limits and 29 * the properties vectors. 30 * 31 * In each row, row[0] contains the start code point and row[1] contains the 32 * limit code point, which is the start of the next range. 33 * 34 * Initially, there is only one range [0..0x110000] with values 0. 35 * 36 * It would be possible to store only one range boundary per row, but 37 * self-contained rows allow to later sort them by contents. 38 * @hide Only a subset of ICU is exposed in Android 39 */ 40public class PropsVectors { 41 private int v[]; 42 private int columns; // number of columns, plus two for start 43 // and limit values 44 private int maxRows; 45 private int rows; 46 private int prevRow; // search optimization: remember last row seen 47 private boolean isCompacted; 48 49 // internal function to compare elements in v and target. Return true iff 50 // elements in v starting from index1 to index1 + length - 1 51 // are exactly the same as elements in target 52 // starting from index2 to index2 + length - 1 53 private boolean areElementsSame(int index1, int[] target, int index2, 54 int length) { 55 for (int i = 0; i < length; ++i) { 56 if (v[index1 + i] != target[index2 + i]) { 57 return false; 58 } 59 } 60 return true; 61 } 62 63 // internal function which given rangeStart, returns 64 // index where v[index]<=rangeStart<v[index+1]. 65 // The returned index is a multiple of columns, and therefore 66 // points to the start of a row. 67 private int findRow(int rangeStart) { 68 int index = 0; 69 70 // check the vicinity of the last-seen row (start 71 // searching with an unrolled loop) 72 73 index = prevRow * columns; 74 if (rangeStart >= v[index]) { 75 if (rangeStart < v[index + 1]) { 76 // same row as last seen 77 return index; 78 } else { 79 index += columns; 80 if (rangeStart < v[index + 1]) { 81 ++prevRow; 82 return index; 83 } else { 84 index += columns; 85 if (rangeStart < v[index + 1]) { 86 prevRow += 2; 87 return index; 88 } else if ((rangeStart - v[index + 1]) < 10) { 89 // we are close, continue looping 90 prevRow += 2; 91 do { 92 ++prevRow; 93 index += columns; 94 } while (rangeStart >= v[index + 1]); 95 return index; 96 } 97 } 98 } 99 } else if (rangeStart < v[1]) { 100 // the very first row 101 prevRow = 0; 102 return 0; 103 } 104 105 // do a binary search for the start of the range 106 int start = 0; 107 int mid = 0; 108 int limit = rows; 109 while (start < limit - 1) { 110 mid = (start + limit) / 2; 111 index = columns * mid; 112 if (rangeStart < v[index]) { 113 limit = mid; 114 } else if (rangeStart < v[index + 1]) { 115 prevRow = mid; 116 return index; 117 } else { 118 start = mid; 119 } 120 } 121 122 // must be found because all ranges together always cover 123 // all of Unicode 124 prevRow = start; 125 index = start * columns; 126 return index; 127 } 128 129 /* 130 * Special pseudo code points for storing the initialValue and the 131 * errorValue which are used to initialize a Trie or similar. 132 */ 133 public final static int FIRST_SPECIAL_CP = 0x110000; 134 public final static int INITIAL_VALUE_CP = 0x110000; 135 public final static int ERROR_VALUE_CP = 0x110001; 136 public final static int MAX_CP = 0x110001; 137 138 public final static int INITIAL_ROWS = 1 << 12; 139 public final static int MEDIUM_ROWS = 1 << 16; 140 public final static int MAX_ROWS = MAX_CP + 1; 141 142 /* 143 * Constructor. 144 * @param numOfColumns Number of value integers (32-bit int) per row. 145 */ 146 public PropsVectors(int numOfColumns) { 147 if (numOfColumns < 1) { 148 throw new IllegalArgumentException("numOfColumns need to be no " 149 + "less than 1; but it is " + numOfColumns); 150 } 151 columns = numOfColumns + 2; // count range start and limit columns 152 v = new int[INITIAL_ROWS * columns]; 153 maxRows = INITIAL_ROWS; 154 rows = 2 + (MAX_CP - FIRST_SPECIAL_CP); 155 prevRow = 0; 156 isCompacted = false; 157 v[0] = 0; 158 v[1] = 0x110000; 159 int index = columns; 160 for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) { 161 v[index] = cp; 162 v[index + 1] = cp + 1; 163 index += columns; 164 } 165 } 166 167 /* 168 * In rows for code points [start..end], select the column, reset the mask 169 * bits and set the value bits (ANDed with the mask). 170 * 171 * @throws IllegalArgumentException 172 * 173 * @throws IllegalStateException 174 * 175 * @throws IndexOutOfBoundsException 176 */ 177 public void setValue(int start, int end, int column, int value, int mask) { 178 if (start < 0 || start > end || end > MAX_CP || column < 0 179 || column >= (columns - 2)) { 180 throw new IllegalArgumentException(); 181 } 182 if (isCompacted) { 183 throw new IllegalStateException("Shouldn't be called after" 184 + "compact()!"); 185 } 186 187 int firstRow, lastRow; 188 int limit = end + 1; 189 boolean splitFirstRow, splitLastRow; 190 // skip range start and limit columns 191 column += 2; 192 value &= mask; 193 194 // find the rows whose ranges overlap with the input range 195 // find the first and last row, always successful 196 firstRow = findRow(start); 197 lastRow = findRow(end); 198 199 /* 200 * Rows need to be split if they partially overlap with the input range 201 * (only possible for the first and last rows) and if their value 202 * differs from the input value. 203 */ 204 splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask)); 205 splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask)); 206 207 // split first/last rows if necessary 208 if (splitFirstRow || splitLastRow) { 209 int rowsToExpand = 0; 210 if (splitFirstRow) { 211 ++rowsToExpand; 212 } 213 if (splitLastRow) { 214 ++rowsToExpand; 215 } 216 int newMaxRows = 0; 217 if ((rows + rowsToExpand) > maxRows) { 218 if (maxRows < MEDIUM_ROWS) { 219 newMaxRows = MEDIUM_ROWS; 220 } else if (maxRows < MAX_ROWS) { 221 newMaxRows = MAX_ROWS; 222 } else { 223 throw new IndexOutOfBoundsException( 224 "MAX_ROWS exceeded! Increase it to a higher value" + 225 "in the implementation"); 226 } 227 int[] temp = new int[newMaxRows * columns]; 228 System.arraycopy(v, 0, temp, 0, rows * columns); 229 v = temp; 230 maxRows = newMaxRows; 231 } 232 233 // count the number of row cells to move after the last row, 234 // and move them 235 int count = (rows * columns) - (lastRow + columns); 236 if (count > 0) { 237 System.arraycopy(v, lastRow + columns, v, lastRow 238 + (1 + rowsToExpand) * columns, count); 239 } 240 rows += rowsToExpand; 241 242 // split the first row, and move the firstRow pointer 243 // to the second part 244 if (splitFirstRow) { 245 // copy all affected rows up one and move the lastRow pointer 246 count = lastRow - firstRow + columns; 247 System.arraycopy(v, firstRow, v, firstRow + columns, count); 248 lastRow += columns; 249 250 // split the range and move the firstRow pointer 251 v[firstRow + 1] = v[firstRow + columns] = start; 252 firstRow += columns; 253 } 254 255 // split the last row 256 if (splitLastRow) { 257 // copy the last row data 258 System.arraycopy(v, lastRow, v, lastRow + columns, columns); 259 260 // split the range and move the firstRow pointer 261 v[lastRow + 1] = v[lastRow + columns] = limit; 262 } 263 } 264 265 // set the "row last seen" to the last row for the range 266 prevRow = lastRow / columns; 267 268 // set the input value in all remaining rows 269 firstRow += column; 270 lastRow += column; 271 mask = ~mask; 272 for (;;) { 273 v[firstRow] = (v[firstRow] & mask) | value; 274 if (firstRow == lastRow) { 275 break; 276 } 277 firstRow += columns; 278 } 279 } 280 281 /* 282 * Always returns 0 if called after compact(). 283 */ 284 public int getValue(int c, int column) { 285 if (isCompacted || c < 0 || c > MAX_CP || column < 0 286 || column >= (columns - 2)) { 287 return 0; 288 } 289 int index = findRow(c); 290 return v[index + 2 + column]; 291 } 292 293 /* 294 * Returns an array which contains value elements 295 * in row rowIndex. 296 * 297 * @throws IllegalStateException 298 * @throws IllegalArgumentException 299 */ 300 public int[] getRow(int rowIndex) { 301 if (isCompacted) { 302 throw new IllegalStateException( 303 "Illegal Invocation of the method after compact()"); 304 } 305 if (rowIndex < 0 || rowIndex > rows) { 306 throw new IllegalArgumentException("rowIndex out of bound!"); 307 } 308 int[] rowToReturn = new int[columns - 2]; 309 System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0, 310 columns - 2); 311 return rowToReturn; 312 } 313 314 /* 315 * Returns an int which is the start codepoint 316 * in row rowIndex. 317 * 318 * @throws IllegalStateException 319 * 320 * @throws IllegalArgumentException 321 */ 322 public int getRowStart(int rowIndex) { 323 if (isCompacted) { 324 throw new IllegalStateException( 325 "Illegal Invocation of the method after compact()"); 326 } 327 if (rowIndex < 0 || rowIndex > rows) { 328 throw new IllegalArgumentException("rowIndex out of bound!"); 329 } 330 return v[rowIndex * columns]; 331 } 332 333 /* 334 * Returns an int which is the limit codepoint 335 * minus 1 in row rowIndex. 336 * 337 * @throws IllegalStateException 338 * 339 * @throws IllegalArgumentException 340 */ 341 public int getRowEnd(int rowIndex) { 342 if (isCompacted) { 343 throw new IllegalStateException( 344 "Illegal Invocation of the method after compact()"); 345 } 346 if (rowIndex < 0 || rowIndex > rows) { 347 throw new IllegalArgumentException("rowIndex out of bound!"); 348 } 349 return v[rowIndex * columns + 1] - 1; 350 } 351 352 /* 353 * Compact the vectors: 354 * - modify the memory 355 * - keep only unique vectors 356 * - store them contiguously from the beginning of the memory 357 * - for each (non-unique) row, call the respective function in 358 * CompactHandler 359 * 360 * The handler's rowIndex is the index of the row in the compacted 361 * memory block. Therefore, it starts at 0 increases in increments of the 362 * columns value. 363 * 364 * In a first phase, only special values are delivered (each exactly once). 365 * Then CompactHandler::startRealValues() is called 366 * where rowIndex is the length of the compacted array. 367 * Then, in the second phase, the CompactHandler::setRowIndexForRange() is 368 * called for each row of real values. 369 */ 370 public void compact(CompactHandler compactor) { 371 if (isCompacted) { 372 return; 373 } 374 375 // Set the flag now: Sorting and compacting destroys the builder 376 // data structure. 377 isCompacted = true; 378 int valueColumns = columns - 2; // not counting start & limit 379 380 // sort the properties vectors to find unique vector values 381 Integer[] indexArray = new Integer[rows]; 382 for (int i = 0; i < rows; ++i) { 383 indexArray[i] = Integer.valueOf(columns * i); 384 } 385 386 Arrays.sort(indexArray, new Comparator<Integer>() { 387 @Override 388 public int compare(Integer o1, Integer o2) { 389 int indexOfRow1 = o1.intValue(); 390 int indexOfRow2 = o2.intValue(); 391 int count = columns; // includes start/limit columns 392 393 // start comparing after start/limit 394 // but wrap around to them 395 int index = 2; 396 do { 397 if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) { 398 return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1 399 : 1; 400 } 401 if (++index == columns) { 402 index = 0; 403 } 404 } while (--count > 0); 405 406 return 0; 407 } 408 }); 409 410 /* 411 * Find and set the special values. This has to do almost the same work 412 * as the compaction below, to find the indexes where the special-value 413 * rows will move. 414 */ 415 int count = -valueColumns; 416 for (int i = 0; i < rows; ++i) { 417 int start = v[indexArray[i].intValue()]; 418 419 // count a new values vector if it is different 420 // from the current one 421 if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v, 422 indexArray[i-1].intValue() + 2, valueColumns)) { 423 count += valueColumns; 424 } 425 426 if (start == INITIAL_VALUE_CP) { 427 compactor.setRowIndexForInitialValue(count); 428 } else if (start == ERROR_VALUE_CP) { 429 compactor.setRowIndexForErrorValue(count); 430 } 431 } 432 433 // count is at the beginning of the last vector, 434 // add valueColumns to include that last vector 435 count += valueColumns; 436 437 // Call the handler once more to signal the start of 438 // delivering real values. 439 compactor.startRealValues(count); 440 441 /* 442 * Move vector contents up to a contiguous array with only unique 443 * vector values, and call the handler function for each vector. 444 * 445 * This destroys the Properties Vector structure and replaces it 446 * with an array of just vector values. 447 */ 448 int[] temp = new int[count]; 449 count = -valueColumns; 450 for (int i = 0; i < rows; ++i) { 451 int start = v[indexArray[i].intValue()]; 452 int limit = v[indexArray[i].intValue() + 1]; 453 454 // count a new values vector if it is different 455 // from the current one 456 if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, 457 temp, count, valueColumns)) { 458 count += valueColumns; 459 System.arraycopy(v, indexArray[i].intValue() + 2, temp, count, 460 valueColumns); 461 } 462 463 if (start < FIRST_SPECIAL_CP) { 464 compactor.setRowIndexForRange(start, limit - 1, count); 465 } 466 } 467 v = temp; 468 469 // count is at the beginning of the last vector, 470 // add one to include that last vector 471 rows = count / valueColumns + 1; 472 } 473 474 /* 475 * Get the vectors array after calling compact(). 476 * 477 * @throws IllegalStateException 478 */ 479 public int[] getCompactedArray() { 480 if (!isCompacted) { 481 throw new IllegalStateException( 482 "Illegal Invocation of the method before compact()"); 483 } 484 return v; 485 } 486 487 /* 488 * Get the number of rows for the compacted array. 489 * 490 * @throws IllegalStateException 491 */ 492 public int getCompactedRows() { 493 if (!isCompacted) { 494 throw new IllegalStateException( 495 "Illegal Invocation of the method before compact()"); 496 } 497 return rows; 498 } 499 500 /* 501 * Get the number of columns for the compacted array. 502 * 503 * @throws IllegalStateException 504 */ 505 public int getCompactedColumns() { 506 if (!isCompacted) { 507 throw new IllegalStateException( 508 "Illegal Invocation of the method before compact()"); 509 } 510 return columns - 2; 511 } 512 513 /* 514 * Call compact(), create a IntTrie with indexes into the compacted 515 * vectors array. 516 */ 517 public IntTrie compactToTrieWithRowIndexes() { 518 PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler(); 519 compact(compactor); 520 return compactor.builder.serialize(new DefaultGetFoldedValue( 521 compactor.builder), new DefaultGetFoldingOffset()); 522 } 523 524 // inner class implementation of Trie.DataManipulate 525 private static class DefaultGetFoldingOffset implements Trie.DataManipulate { 526 @Override 527 public int getFoldingOffset(int value) { 528 return value; 529 } 530 } 531 532 // inner class implementation of TrieBuilder.DataManipulate 533 private static class DefaultGetFoldedValue implements 534 TrieBuilder.DataManipulate { 535 private IntTrieBuilder builder; 536 537 public DefaultGetFoldedValue(IntTrieBuilder inBuilder) { 538 builder = inBuilder; 539 } 540 541 @Override 542 public int getFoldedValue(int start, int offset) { 543 int initialValue = builder.m_initialValue_; 544 int limit = start + 0x400; 545 while (start < limit) { 546 boolean[] inBlockZero = new boolean[1]; 547 int value = builder.getValue(start, inBlockZero); 548 if (inBlockZero[0]) { 549 start += TrieBuilder.DATA_BLOCK_LENGTH; 550 } else if (value != initialValue) { 551 return offset; 552 } else { 553 ++start; 554 } 555 } 556 return 0; 557 } 558 } 559 560 public static interface CompactHandler { 561 public void setRowIndexForRange(int start, int end, int rowIndex); 562 public void setRowIndexForInitialValue(int rowIndex); 563 public void setRowIndexForErrorValue(int rowIndex); 564 public void startRealValues(int rowIndex); 565 } 566}