1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 2012-2015, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 ******************************************************************************* 7 * CollationKeys.java, ported from collationkeys.h/.cpp 8 * 9 * C++ version created on: 2012sep02 10 * created by: Markus W. Scherer 11 */ 12 13package android.icu.impl.coll; 14 15import android.icu.text.Collator; 16 17/** 18 * @hide Only a subset of ICU is exposed in Android 19 */ 20public final class CollationKeys /* all methods are static */ { 21 22 // Java porting note: C++ SortKeyByteSink class extends a common class ByteSink, 23 // which is not available in Java. We don't need a super class created for implementing 24 // collation features. 25 public static abstract class SortKeyByteSink { 26 protected byte[] buffer_; 27 // protected int capacity_; == buffer_.length 28 private int appended_ = 0; 29 // not used in Java -- private int ignore_ = 0; 30 31 public SortKeyByteSink(byte[] dest) { 32 buffer_ = dest; 33 } 34 35 /** 36 * Needed in Java for when we write to the buffer directly. 37 * In C++, the SortKeyByteSink is a subclass of ByteSink and lower-level code can write to that. 38 * TODO: Can we make Java SortKeyByteSink have-a ByteArrayWrapper and write through to it? 39 * Or maybe create interface ByteSink, have SortKeyByteSink implement it, and have BOCSU write to that?? 40 */ 41 public void setBufferAndAppended(byte[] dest, int app) { 42 buffer_ = dest; 43 appended_ = app; 44 } 45 46 /* not used in Java -- public void IgnoreBytes(int numIgnore) { 47 ignore_ = numIgnore; 48 } */ 49 50 /** 51 * @param bytes 52 * the array of byte 53 * @param n 54 * the length of bytes to be appended 55 */ 56 public void Append(byte[] bytes, int n) { 57 if (n <= 0 || bytes == null) { 58 return; 59 } 60 61 /* not used in Java -- if (ignore_ > 0) { 62 int ignoreRest = ignore_ - n; 63 if (ignoreRest >= 0) { 64 ignore_ = ignoreRest; 65 return; 66 } else { 67 start = ignore_; 68 n = -ignoreRest; 69 ignore_ = 0; 70 } 71 } */ 72 73 int length = appended_; 74 appended_ += n; 75 76 int available = buffer_.length - length; 77 if (n <= available) { 78 System.arraycopy(bytes, 0, buffer_, length, n); 79 } else { 80 AppendBeyondCapacity(bytes, 0, n, length); 81 } 82 } 83 84 public void Append(int b) { 85 /* not used in Java -- if (ignore_ > 0) { 86 --ignore_; 87 } else */ { 88 if (appended_ < buffer_.length || Resize(1, appended_)) { 89 buffer_[appended_] = (byte) b; 90 } 91 ++appended_; 92 } 93 } 94 95 // Java porting note: This method is not used by collator implementation. 96 // 97 // virtual char *GetAppendBuffer(int min_capacity, 98 // int desired_capacity_hint, 99 // char *scratch, int scratch_capacity, 100 // int *result_capacity); 101 102 public int NumberOfBytesAppended() { 103 return appended_; 104 } 105 106 public int GetRemainingCapacity() { 107 return /* not used in Java -- ignore_ + */ buffer_.length - appended_; 108 } 109 110 public boolean Overflowed() { 111 return appended_ > buffer_.length; 112 } 113 114 /* not used in Java -- public boolean IsOk() { 115 return true; 116 } */ 117 118 /** 119 * @param bytes 120 * the array of byte 121 * @param start 122 * the start index within the array to be appended 123 * @param n 124 * the length of bytes to be appended 125 * @param length 126 * the length of buffer required to store the entire data (i.e. already appended 127 * bytes + bytes to be appended by this method) 128 */ 129 protected abstract void AppendBeyondCapacity(byte[] bytes, int start, int n, int length); 130 131 protected abstract boolean Resize(int appendCapacity, int length); 132 } 133 134 public static class LevelCallback { 135 /** 136 * @param level 137 * The next level about to be written to the ByteSink. 138 * @return true if the level is to be written (the base class implementation always returns 139 * true) 140 */ 141 boolean needToWrite(int level) { 142 return true; 143 } 144 } 145 public static final LevelCallback SIMPLE_LEVEL_FALLBACK = new LevelCallback(); 146 147 private static final class SortKeyLevel { 148 private static final int INITIAL_CAPACITY = 40; 149 150 byte[] buffer = new byte[INITIAL_CAPACITY]; 151 int len = 0; 152 // not used in Java -- private static final boolean ok = true; // In C++ "ok" is reset when memory allocations fail. 153 154 SortKeyLevel() { 155 } 156 157 /* not used in Java -- boolean isOk() { 158 return ok; 159 } */ 160 161 boolean isEmpty() { 162 return len == 0; 163 } 164 165 int length() { 166 return len; 167 } 168 169 // Java porting note: Java uses this instead of C++ operator [] overload 170 // uint8_t operator[](int index) 171 byte getAt(int index) { 172 return buffer[index]; 173 } 174 175 byte[] data() { 176 return buffer; 177 } 178 179 void appendByte(int b) { 180 if (len < buffer.length || ensureCapacity(1)) { 181 buffer[len++] = (byte) b; 182 } 183 } 184 185 void appendWeight16(int w) { 186 assert ((w & 0xffff) != 0); 187 byte b0 = (byte) (w >>> 8); 188 byte b1 = (byte) w; 189 int appendLength = (b1 == 0) ? 1 : 2; 190 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 191 buffer[len++] = b0; 192 if (b1 != 0) { 193 buffer[len++] = b1; 194 } 195 } 196 } 197 198 void appendWeight32(long w) { 199 assert (w != 0); 200 byte[] bytes = new byte[] { (byte) (w >>> 24), (byte) (w >>> 16), (byte) (w >>> 8), 201 (byte) w }; 202 int appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4; 203 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 204 buffer[len++] = bytes[0]; 205 if (bytes[1] != 0) { 206 buffer[len++] = bytes[1]; 207 if (bytes[2] != 0) { 208 buffer[len++] = bytes[2]; 209 if (bytes[3] != 0) { 210 buffer[len++] = bytes[3]; 211 } 212 } 213 } 214 } 215 } 216 217 void appendReverseWeight16(int w) { 218 assert ((w & 0xffff) != 0); 219 byte b0 = (byte) (w >>> 8); 220 byte b1 = (byte) w; 221 int appendLength = (b1 == 0) ? 1 : 2; 222 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 223 if (b1 == 0) { 224 buffer[len++] = b0; 225 } else { 226 buffer[len] = b1; 227 buffer[len + 1] = b0; 228 len += 2; 229 } 230 } 231 } 232 233 // Appends all but the last byte to the sink. The last byte should be the 01 terminator. 234 void appendTo(SortKeyByteSink sink) { 235 assert (len > 0 && buffer[len - 1] == 1); 236 sink.Append(buffer, len - 1); 237 } 238 239 private boolean ensureCapacity(int appendCapacity) { 240 /* not used in Java -- if (!ok) { 241 return false; 242 } */ 243 int newCapacity = 2 * buffer.length; 244 int altCapacity = len + 2 * appendCapacity; 245 if (newCapacity < altCapacity) { 246 newCapacity = altCapacity; 247 } 248 if (newCapacity < 200) { 249 newCapacity = 200; 250 } 251 byte[] newbuf = new byte[newCapacity]; 252 System.arraycopy(buffer, 0, newbuf, 0, len); 253 buffer = newbuf; 254 255 return true; 256 } 257 } 258 259 private static SortKeyLevel getSortKeyLevel(int levels, int level) { 260 return (levels & level) != 0 ? new SortKeyLevel() : null; 261 } 262 263 private CollationKeys() { 264 } // no instantiation 265 266 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. 267 private static final int SEC_COMMON_LOW = Collation.COMMON_BYTE; 268 private static final int SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; 269 static final int SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; // read by CollationDataReader 270 private static final int SEC_COMMON_MAX_COUNT = 0x21; 271 272 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. 273 private static final int CASE_LOWER_FIRST_COMMON_LOW = 1; 274 private static final int CASE_LOWER_FIRST_COMMON_MIDDLE = 7; 275 private static final int CASE_LOWER_FIRST_COMMON_HIGH = 13; 276 private static final int CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; 277 278 // Case level, upperFirst: Compress up to 13 common weights as 3..15. 279 private static final int CASE_UPPER_FIRST_COMMON_LOW = 3; 280 @SuppressWarnings("unused") 281 private static final int CASE_UPPER_FIRST_COMMON_HIGH = 15; 282 private static final int CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; 283 284 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. 285 private static final int TER_ONLY_COMMON_LOW = Collation.COMMON_BYTE; 286 private static final int TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; 287 private static final int TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; 288 private static final int TER_ONLY_COMMON_MAX_COUNT = 0x61; 289 290 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. 291 private static final int TER_LOWER_FIRST_COMMON_LOW = Collation.COMMON_BYTE; 292 private static final int TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; 293 private static final int TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; 294 private static final int TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; 295 296 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. 297 private static final int TER_UPPER_FIRST_COMMON_LOW = Collation.COMMON_BYTE + 0x80; 298 private static final int TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; 299 private static final int TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; 300 private static final int TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; 301 302 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. 303 private static final int QUAT_COMMON_LOW = 0x1c; 304 private static final int QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; 305 private static final int QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; 306 private static final int QUAT_COMMON_MAX_COUNT = 0x71; 307 // Primary weights shifted to quaternary level must be encoded with 308 // a lead byte below the common-weight compression range. 309 private static final int QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b 310 311 /** 312 * Map from collation strength (UColAttributeValue) to a mask of Collation.Level bits up to that 313 * strength, excluding the CASE_LEVEL which is independent of the strength, and excluding 314 * IDENTICAL_LEVEL which this function does not write. 315 */ 316 private static final int levelMasks[] = new int[] { 317 2, // UCOL_PRIMARY -> PRIMARY_LEVEL 318 6, // UCOL_SECONDARY -> up to SECONDARY_LEVEL 319 0x16, // UCOL_TERTIARY -> up to TERTIARY_LEVEL 320 0x36, // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL 321 0, 0, 0, 0, 322 0, 0, 0, 0, 323 0, 0, 0, 324 0x36 // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL 325 }; 326 327 /** 328 * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes 329 * the case level. Stops writing levels when callback.needToWrite(level) returns false. 330 * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE. 331 */ 332 public static void writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes, 333 CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, 334 boolean preflight) { 335 336 int options = settings.options; 337 // Set of levels to process and write. 338 int levels = levelMasks[CollationSettings.getStrength(options)]; 339 if ((options & CollationSettings.CASE_LEVEL) != 0) { 340 levels |= Collation.CASE_LEVEL_FLAG; 341 } 342 // Minus the levels below minLevel. 343 levels &= ~((1 << minLevel) - 1); 344 if (levels == 0) { 345 return; 346 } 347 348 long variableTop; 349 if ((options & CollationSettings.ALTERNATE_MASK) == 0) { 350 variableTop = 0; 351 } else { 352 // +1 so that we can use "<" and primary ignorables test out early. 353 variableTop = settings.variableTop + 1; 354 } 355 356 int tertiaryMask = CollationSettings.getTertiaryMask(options); 357 358 byte[] p234 = new byte[3]; 359 SortKeyLevel cases = getSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG); 360 SortKeyLevel secondaries = getSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG); 361 SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG); 362 SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG); 363 364 long prevReorderedPrimary = 0; // 0==no compression 365 int commonCases = 0; 366 int commonSecondaries = 0; 367 int commonTertiaries = 0; 368 int commonQuaternaries = 0; 369 370 int prevSecondary = 0; 371 int secSegmentStart = 0; 372 373 for (;;) { 374 // No need to keep all CEs in the buffer when we write a sort key. 375 iter.clearCEsIfNoneRemaining(); 376 long ce = iter.nextCE(); 377 long p = ce >>> 32; 378 if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) { 379 // Variable CE, shift it to quaternary level. 380 // Ignore all following primary ignorables, and shift further variable CEs. 381 if (commonQuaternaries != 0) { 382 --commonQuaternaries; 383 while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { 384 quaternaries.appendByte(QUAT_COMMON_MIDDLE); 385 commonQuaternaries -= QUAT_COMMON_MAX_COUNT; 386 } 387 // Shifted primary weights are lower than the common weight. 388 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries); 389 commonQuaternaries = 0; 390 } 391 do { 392 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 393 if (settings.hasReordering()) { 394 p = settings.reorder(p); 395 } 396 if (((int) p >>> 24) >= QUAT_SHIFTED_LIMIT_BYTE) { 397 // Prevent shifted primary lead bytes from 398 // overlapping with the common compression range. 399 quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE); 400 } 401 quaternaries.appendWeight32(p); 402 } 403 do { 404 ce = iter.nextCE(); 405 p = ce >>> 32; 406 } while (p == 0); 407 } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY); 408 } 409 // ce could be primary ignorable, or NO_CE, or the merge separator, 410 // or a regular primary CE, but it is not variable. 411 // If ce==NO_CE, then write nothing for the primary level but 412 // terminate compression on all levels and then exit the loop. 413 if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) { 414 // Test the un-reordered primary for compressibility. 415 boolean isCompressible = compressibleBytes[(int) p >>> 24]; 416 if(settings.hasReordering()) { 417 p = settings.reorder(p); 418 } 419 int p1 = (int) p >>> 24; 420 if (!isCompressible || p1 != ((int) prevReorderedPrimary >>> 24)) { 421 if (prevReorderedPrimary != 0) { 422 if (p < prevReorderedPrimary) { 423 // No primary compression terminator 424 // at the end of the level or merged segment. 425 if (p1 > Collation.MERGE_SEPARATOR_BYTE) { 426 sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE); 427 } 428 } else { 429 sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE); 430 } 431 } 432 sink.Append(p1); 433 if(isCompressible) { 434 prevReorderedPrimary = p; 435 } else { 436 prevReorderedPrimary = 0; 437 } 438 } 439 byte p2 = (byte) (p >>> 16); 440 if (p2 != 0) { 441 p234[0] = p2; 442 p234[1] = (byte) (p >>> 8); 443 p234[2] = (byte) p; 444 sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3); 445 } 446 // Optimization for internalNextSortKeyPart(): 447 // When the primary level overflows we can stop because we need not 448 // calculate (preflight) the whole sort key length. 449 if (!preflight && sink.Overflowed()) { 450 // not used in Java -- if (!sink.IsOk()) { 451 // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in 452 // C implementation. IsOk() in Java always returns true, so this 453 // is a dead code. 454 return; 455 } 456 } 457 458 int lower32 = (int) ce; 459 if (lower32 == 0) { 460 continue; 461 } // completely ignorable, no secondary/case/tertiary/quaternary 462 463 if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { 464 int s = lower32 >>> 16; // 16 bits 465 if (s == 0) { 466 // secondary ignorable 467 } else if (s == Collation.COMMON_WEIGHT16 && 468 ((options & CollationSettings.BACKWARD_SECONDARY) == 0 || 469 p != Collation.MERGE_SEPARATOR_PRIMARY)) { 470 // s is a common secondary weight, and 471 // backwards-secondary is off or the ce is not the merge separator. 472 ++commonSecondaries; 473 } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) { 474 if (commonSecondaries != 0) { 475 --commonSecondaries; 476 while (commonSecondaries >= SEC_COMMON_MAX_COUNT) { 477 secondaries.appendByte(SEC_COMMON_MIDDLE); 478 commonSecondaries -= SEC_COMMON_MAX_COUNT; 479 } 480 int b; 481 if (s < Collation.COMMON_WEIGHT16) { 482 b = SEC_COMMON_LOW + commonSecondaries; 483 } else { 484 b = SEC_COMMON_HIGH - commonSecondaries; 485 } 486 secondaries.appendByte(b); 487 commonSecondaries = 0; 488 } 489 secondaries.appendWeight16(s); 490 } else { 491 if (commonSecondaries != 0) { 492 --commonSecondaries; 493 // Append reverse weights. The level will be re-reversed later. 494 int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT; 495 int b; 496 if (prevSecondary < Collation.COMMON_WEIGHT16) { 497 b = SEC_COMMON_LOW + remainder; 498 } else { 499 b = SEC_COMMON_HIGH - remainder; 500 } 501 secondaries.appendByte(b); 502 commonSecondaries -= remainder; 503 // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT. 504 while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT 505 secondaries.appendByte(SEC_COMMON_MIDDLE); 506 commonSecondaries -= SEC_COMMON_MAX_COUNT; 507 } 508 // commonSecondaries == 0 509 } 510 if (0 < p && p <= Collation.MERGE_SEPARATOR_PRIMARY) { 511 // The backwards secondary level compares secondary weights backwards 512 // within segments separated by the merge separator (U+FFFE). 513 byte[] secs = secondaries.data(); 514 int last = secondaries.length() - 1; 515 while (secSegmentStart < last) { 516 byte b = secs[secSegmentStart]; 517 secs[secSegmentStart++] = secs[last]; 518 secs[last--] = b; 519 } 520 secondaries.appendByte(p == Collation.NO_CE_PRIMARY ? 521 Collation.LEVEL_SEPARATOR_BYTE : Collation.MERGE_SEPARATOR_BYTE); 522 prevSecondary = 0; 523 secSegmentStart = secondaries.length(); 524 } else { 525 secondaries.appendReverseWeight16(s); 526 prevSecondary = s; 527 } 528 } 529 } 530 531 if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { 532 if ((CollationSettings.getStrength(options) == Collator.PRIMARY) ? p == 0 533 : (lower32 >>> 16) == 0) { 534 // Primary+caseLevel: Ignore case level weights of primary ignorables. 535 // Otherwise: Ignore case level weights of secondary ignorables. 536 // For details see the comments in the CollationCompare class. 537 } else { 538 int c = (lower32 >>> 8) & 0xff; // case bits & tertiary lead byte 539 assert ((c & 0xc0) != 0xc0); 540 if ((c & 0xc0) == 0 && c > Collation.LEVEL_SEPARATOR_BYTE) { 541 ++commonCases; 542 } else { 543 if ((options & CollationSettings.UPPER_FIRST) == 0) { 544 // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, 545 // upper=15. 546 // If there are only common (=lowest) weights in the whole level, 547 // then we need not write anything. 548 // Level length differences are handled already on the next-higher level. 549 if (commonCases != 0 && 550 (c > Collation.LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) { 551 --commonCases; 552 while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) { 553 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4); 554 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT; 555 } 556 int b; 557 if (c <= Collation.LEVEL_SEPARATOR_BYTE) { 558 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases; 559 } else { 560 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases; 561 } 562 cases.appendByte(b << 4); 563 commonCases = 0; 564 } 565 if (c > Collation.LEVEL_SEPARATOR_BYTE) { 566 c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >>> 6)) << 4; // 14 or 15 567 } 568 } else { 569 // upperFirst: Compress common weights to nibbles 3..15, mixed=2, 570 // upper=1. 571 // The compressed common case weights only go up from the "low" value 572 // because with upperFirst the common weight is the highest one. 573 if (commonCases != 0) { 574 --commonCases; 575 while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) { 576 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4); 577 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT; 578 } 579 cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4); 580 commonCases = 0; 581 } 582 if (c > Collation.LEVEL_SEPARATOR_BYTE) { 583 c = (CASE_UPPER_FIRST_COMMON_LOW - (c >>> 6)) << 4; // 2 or 1 584 } 585 } 586 // c is a separator byte 01, 587 // or a left-shifted nibble 0x10, 0x20, ... 0xf0. 588 cases.appendByte(c); 589 } 590 } 591 } 592 593 if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { 594 int t = lower32 & tertiaryMask; 595 assert ((lower32 & 0xc000) != 0xc000); 596 if (t == Collation.COMMON_WEIGHT16) { 597 ++commonTertiaries; 598 } else if ((tertiaryMask & 0x8000) == 0) { 599 // Tertiary weights without case bits. 600 // Move lead bytes 06..3F to C6..FF for a large common-weight range. 601 if (commonTertiaries != 0) { 602 --commonTertiaries; 603 while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) { 604 tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE); 605 commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT; 606 } 607 int b; 608 if (t < Collation.COMMON_WEIGHT16) { 609 b = TER_ONLY_COMMON_LOW + commonTertiaries; 610 } else { 611 b = TER_ONLY_COMMON_HIGH - commonTertiaries; 612 } 613 tertiaries.appendByte(b); 614 commonTertiaries = 0; 615 } 616 if (t > Collation.COMMON_WEIGHT16) { 617 t += 0xc000; 618 } 619 tertiaries.appendWeight16(t); 620 } else if ((options & CollationSettings.UPPER_FIRST) == 0) { 621 // Tertiary weights with caseFirst=lowerFirst. 622 // Move lead bytes 06..BF to 46..FF for the common-weight range. 623 if (commonTertiaries != 0) { 624 --commonTertiaries; 625 while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) { 626 tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE); 627 commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT; 628 } 629 int b; 630 if (t < Collation.COMMON_WEIGHT16) { 631 b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries; 632 } else { 633 b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries; 634 } 635 tertiaries.appendByte(b); 636 commonTertiaries = 0; 637 } 638 if (t > Collation.COMMON_WEIGHT16) { 639 t += 0x4000; 640 } 641 tertiaries.appendWeight16(t); 642 } else { 643 // Tertiary weights with caseFirst=upperFirst. 644 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), 645 // to keep tertiary CEs well-formed. 646 // Their case+tertiary weights must be greater than those of 647 // primary and secondary CEs. 648 // 649 // Separator 01 -> 01 (unchanged) 650 // Lowercase 02..04 -> 82..84 (includes uncased) 651 // Common weight 05 -> 85..C5 (common-weight compression range) 652 // Lowercase 06..3F -> C6..FF 653 // Mixed case 42..7F -> 42..7F 654 // Uppercase 82..BF -> 02..3F 655 // Tertiary CE 86..BF -> C6..FF 656 if (t <= Collation.NO_CE_WEIGHT16) { 657 // Keep separators unchanged. 658 } else if ((lower32 >>> 16) != 0) { 659 // Invert case bits of primary & secondary CEs. 660 t ^= 0xc000; 661 if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) { 662 t -= 0x4000; 663 } 664 } else { 665 // Keep uppercase bits of tertiary CEs. 666 assert (0x8600 <= t && t <= 0xbfff); 667 t += 0x4000; 668 } 669 if (commonTertiaries != 0) { 670 --commonTertiaries; 671 while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) { 672 tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE); 673 commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT; 674 } 675 int b; 676 if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) { 677 b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries; 678 } else { 679 b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries; 680 } 681 tertiaries.appendByte(b); 682 commonTertiaries = 0; 683 } 684 tertiaries.appendWeight16(t); 685 } 686 } 687 688 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 689 int q = lower32 & 0xffff; 690 if ((q & 0xc0) == 0 && q > Collation.NO_CE_WEIGHT16) { 691 ++commonQuaternaries; 692 } else if (q == Collation.NO_CE_WEIGHT16 693 && (options & CollationSettings.ALTERNATE_MASK) == 0 694 && quaternaries.isEmpty()) { 695 // If alternate=non-ignorable and there are only common quaternary weights, 696 // then we need not write anything. 697 // The only weights greater than the merge separator and less than the common 698 // weight 699 // are shifted primary weights, which are not generated for 700 // alternate=non-ignorable. 701 // There are also exactly as many quaternary weights as tertiary weights, 702 // so level length differences are handled already on tertiary level. 703 // Any above-common quaternary weight will compare greater regardless. 704 quaternaries.appendByte(Collation.LEVEL_SEPARATOR_BYTE); 705 } else { 706 if (q == Collation.NO_CE_WEIGHT16) { 707 q = Collation.LEVEL_SEPARATOR_BYTE; 708 } else { 709 q = 0xfc + ((q >>> 6) & 3); 710 } 711 if (commonQuaternaries != 0) { 712 --commonQuaternaries; 713 while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { 714 quaternaries.appendByte(QUAT_COMMON_MIDDLE); 715 commonQuaternaries -= QUAT_COMMON_MAX_COUNT; 716 } 717 int b; 718 if (q < QUAT_COMMON_LOW) { 719 b = QUAT_COMMON_LOW + commonQuaternaries; 720 } else { 721 b = QUAT_COMMON_HIGH - commonQuaternaries; 722 } 723 quaternaries.appendByte(b); 724 commonQuaternaries = 0; 725 } 726 quaternaries.appendByte(q); 727 } 728 } 729 730 if ((lower32 >>> 24) == Collation.LEVEL_SEPARATOR_BYTE) { 731 break; 732 } // ce == NO_CE 733 } 734 735 // Append the beyond-primary levels. 736 // not used in Java -- boolean ok = true; 737 if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { 738 if (!callback.needToWrite(Collation.SECONDARY_LEVEL)) { 739 return; 740 } 741 // not used in Java -- ok &= secondaries.isOk(); 742 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 743 secondaries.appendTo(sink); 744 } 745 746 if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { 747 if (!callback.needToWrite(Collation.CASE_LEVEL)) { 748 return; 749 } 750 // not used in Java -- ok &= cases.isOk(); 751 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 752 // Write pairs of nibbles as bytes, except separator bytes as themselves. 753 int length = cases.length() - 1; // Ignore the trailing NO_CE. 754 byte b = 0; 755 for (int i = 0; i < length; ++i) { 756 byte c = cases.getAt(i); 757 assert ((c & 0xf) == 0 && c != 0); 758 if (b == 0) { 759 b = c; 760 } else { 761 sink.Append(b | ((c >> 4) & 0xf)); 762 b = 0; 763 } 764 } 765 if (b != 0) { 766 sink.Append(b); 767 } 768 } 769 770 if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { 771 if (!callback.needToWrite(Collation.TERTIARY_LEVEL)) { 772 return; 773 } 774 // not used in Java -- ok &= tertiaries.isOk(); 775 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 776 tertiaries.appendTo(sink); 777 } 778 779 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 780 if (!callback.needToWrite(Collation.QUATERNARY_LEVEL)) { 781 return; 782 } 783 // not used in Java -- ok &= quaternaries.isOk(); 784 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 785 quaternaries.appendTo(sink); 786 } 787 788 // not used in Java -- if (!ok || !sink.IsOk()) { 789 // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in 790 // C implementation. IsOk() in Java always returns true, so this 791 // is a dead code. 792 } 793} 794