CollationCompare.java revision 7935b1839a081ed19ae0d33029ad3c09632a2caa
17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2014, International Business Machines 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Corporation and others. All Rights Reserved. 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * CollationCompare.java, ported from collationcompare.h/.cpp 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * C++ version created on: 2012feb14 with new and old collation code 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * created by: Markus W. Scherer 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class CollationCompare /* all static */ { 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int compareUpToQuaternary(CollationIterator left, CollationIterator right, 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationSettings settings) { 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int options = settings.options; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long variableTop; 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((options & CollationSettings.ALTERNATE_MASK) == 0) { 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert variableTop = 0; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // +1 so that we can use "<" and primary ignorables test out early. 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert variableTop = settings.variableTop + 1; 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean anyVariable = false; 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Fetch CEs, compare primaries, store secondary & tertiary weights. 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We fetch CEs until we get a non-ignorable primary or reach the end. 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long leftPrimary; 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long ce = left.nextCE(); 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftPrimary = ce >>> 32; 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY) { 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Variable CE, shift it to quaternary level. 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Ignore all following primary ignorables, and shift further variable CEs. 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert anyVariable = true; 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Store only the primary of the variable CE. 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert left.setCurrentCE(ce & 0xffffffff00000000L); 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ce = left.nextCE(); 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftPrimary = ce >>> 32; 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftPrimary == 0) { 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert left.setCurrentCE(0); 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY); 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (leftPrimary == 0); 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long rightPrimary; 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long ce = right.nextCE(); 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightPrimary = ce >>> 32; 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY) { 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Variable CE, shift it to quaternary level. 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Ignore all following primary ignorables, and shift further variable CEs. 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert anyVariable = true; 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Store only the primary of the variable CE. 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert right.setCurrentCE(ce & 0xffffffff00000000L); 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ce = right.nextCE(); 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightPrimary = ce >>> 32; 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (rightPrimary == 0) { 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert right.setCurrentCE(0); 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY); 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (rightPrimary == 0); 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftPrimary != rightPrimary) { 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Return the primary difference, with script reordering. 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte[] reorderTable = settings.reorderTable; 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (reorderTable != null) { 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftPrimary = Collation.reorder(reorderTable, leftPrimary); 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightPrimary = Collation.reorder(reorderTable, rightPrimary); 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER; 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftPrimary == Collation.NO_CE_PRIMARY) { 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Compare the buffered secondary & tertiary weights. 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We might skip the secondary level but continue with the case level 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // which is turned on separately. 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (CollationSettings.getStrength(options) >= Collator.SECONDARY) { 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) { 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftIndex = 0; 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightIndex = 0; 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftSecondary; 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftSecondary = ((int) left.getCE(leftIndex++)) >>> 16; 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (leftSecondary == 0); 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightSecondary; 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightSecondary = ((int) right.getCE(rightIndex++)) >>> 16; 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (rightSecondary == 0); 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftSecondary != rightSecondary) { 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER; 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftSecondary == Collation.NO_CE_WEIGHT16) { 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The backwards secondary level compares secondary weights backwards 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // within segments separated by the merge separator (U+FFFE, weight 02). 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftStart = 0; 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightStart = 0; 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Find the merge separator or the NO_CE terminator. 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftLimit = leftStart; 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long leftLower32; 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while ((leftLower32 = left.getCE(leftLimit) & 0xffffffffL) > Collation.MERGE_SEPARATOR_LOWER32 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || leftLower32 == 0) { 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++leftLimit; 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightLimit = rightStart; 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long rightLower32; 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while ((rightLower32 = right.getCE(rightLimit) & 0xffffffffL) > Collation.MERGE_SEPARATOR_LOWER32 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || rightLower32 == 0) { 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++rightLimit; 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Compare the segments. 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftIndex = leftLimit; 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightIndex = rightLimit; 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftSecondary = 0; 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (leftSecondary == 0 && leftIndex > leftStart) { 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftSecondary = ((int) left.getCE(--leftIndex)) >>> 16; 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightSecondary = 0; 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (rightSecondary == 0 && rightIndex > rightStart) { 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightSecondary = ((int) right.getCE(--rightIndex)) >>> 16; 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftSecondary != rightSecondary) { 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER; 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftSecondary == 0) { 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Did we reach the end of either string? 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Both strings have the same number of merge separators, 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // or else there would have been a primary-level difference. 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert (left.getCE(leftLimit) == right.getCE(rightLimit)); 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (left.getCE(leftLimit) == Collation.NO_CE) { 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Skip both merge separators and continue. 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftStart = leftLimit + 1; 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightStart = rightLimit + 1; 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((options & CollationSettings.CASE_LEVEL) != 0) { 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int strength = CollationSettings.getStrength(options); 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftIndex = 0; 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightIndex = 0; 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftCase, leftLower32, rightCase; 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (strength == Collator.PRIMARY) { 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Primary+caseLevel: Ignore case level weights of primary ignorables. 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Otherwise we would get a-umlaut > a 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // which is not desirable for accent-insensitive sorting. 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Check for (lower 32 bits) == 0 as well because variable CEs are stored 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // with only primary weights. 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long ce; 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ce = left.getCE(leftIndex++); 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftCase = (int) ce; 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while ((ce >>> 32) == 0 || leftCase == 0); 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftLower32 = leftCase; 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftCase &= 0xc000; 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ce = right.getCE(rightIndex++); 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightCase = (int) ce; 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while ((ce >>> 32) == 0 || rightCase == 0); 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightCase &= 0xc000; 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Secondary+caseLevel: By analogy with the above, 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ignore case level weights of secondary ignorables. 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Note: A tertiary CE has uppercase case bits (0.0.ut) 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // to keep tertiary+caseFirst well-formed. 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables. 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Otherwise a tertiary CE's uppercase would be no greater than 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // a primary/secondary CE's uppercase. 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (See UCA well-formedness condition 2.) 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We could construct a special case weight higher than uppercase, 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // but it's simpler to always ignore case weights of secondary ignorables, 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // turning 0.0.ut into 0.0.0.t. 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (See LDML Collation, Case Parameters.) 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftCase = (int) left.getCE(leftIndex++); 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while ((leftCase & 0xffff0000) == 0); 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftLower32 = leftCase; 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftCase &= 0xc000; 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightCase = (int) right.getCE(rightIndex++); 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while ((rightCase & 0xffff0000) == 0); 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightCase &= 0xc000; 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // No need to handle NO_CE and MERGE_SEPARATOR specially: 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // There is one case weight for each previous-level weight, 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // so level length differences were handled there. 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftCase != rightCase) { 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((options & CollationSettings.UPPER_FIRST) == 0) { 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (leftCase < rightCase) ? Collation.LESS : Collation.GREATER; 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (leftCase < rightCase) ? Collation.GREATER : Collation.LESS; 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((leftLower32 >>> 16) == Collation.NO_CE_WEIGHT16) { 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (CollationSettings.getStrength(options) <= Collator.SECONDARY) { 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Collation.EQUAL; 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int tertiaryMask = CollationSettings.getTertiaryMask(options); 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftIndex = 0; 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightIndex = 0; 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int anyQuaternaries = 0; 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leftLower32, leftTertiary; 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftLower32 = (int) left.getCE(leftIndex++); 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert anyQuaternaries |= leftLower32; 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert ((leftLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (leftLower32 & 0xc0c0) == 0); 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftTertiary = leftLower32 & tertiaryMask; 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (leftTertiary == 0); 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rightLower32, rightTertiary; 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightLower32 = (int) right.getCE(rightIndex++); 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert anyQuaternaries |= rightLower32; 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert ((rightLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (rightLower32 & 0xc0c0) == 0); 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightTertiary = rightLower32 & tertiaryMask; 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (rightTertiary == 0); 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftTertiary != rightTertiary) { 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (CollationSettings.sortsTertiaryUpperCaseFirst(options)) { 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Pass through NO_CE and MERGE_SEPARATOR 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and keep real tertiary weights larger than the MERGE_SEPARATOR. 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // to keep tertiary CEs well-formed. 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Their case+tertiary weights must be greater than those of 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // primary and secondary CEs. 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftTertiary > Collation.MERGE_SEPARATOR_WEIGHT16) { 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((leftLower32 & 0xffff0000) != 0) { 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftTertiary ^= 0xc000; 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftTertiary += 0x4000; 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (rightTertiary > Collation.MERGE_SEPARATOR_WEIGHT16) { 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((rightLower32 & 0xffff0000) != 0) { 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightTertiary ^= 0xc000; 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightTertiary += 0x4000; 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER; 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftTertiary == Collation.NO_CE_WEIGHT16) { 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (CollationSettings.getStrength(options) <= Collator.TERTIARY) { 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Collation.EQUAL; 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!anyVariable && (anyQuaternaries & 0xc0) == 0) { 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If there are no "variable" CEs and no non-zero quaternary weights, 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // then there are no quaternary differences. 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Collation.EQUAL; 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftIndex = 0; 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightIndex = 0; 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long leftQuaternary; 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long ce = left.getCE(leftIndex++); 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftQuaternary = ce & 0xffff; 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftQuaternary == 0) { 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Variable primary or completely ignorable. 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftQuaternary = ce >>> 32; 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (leftQuaternary <= Collation.MERGE_SEPARATOR_WEIGHT16) { 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Leave NO_CE or MERGE_SEPARATOR as is. 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Regular CE, not tertiary ignorable. 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Preserve the quaternary weight in bits 7..6. 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftQuaternary |= 0xffffff3fL; 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (leftQuaternary == 0); 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long rightQuaternary; 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long ce = right.getCE(rightIndex++); 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightQuaternary = ce & 0xffff; 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (rightQuaternary == 0) { 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Variable primary or completely ignorable. 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightQuaternary = ce >>> 32; 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (rightQuaternary <= Collation.MERGE_SEPARATOR_WEIGHT16) { 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Leave NO_CE or MERGE_SEPARATOR as is. 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Regular CE, not tertiary ignorable. 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Preserve the quaternary weight in bits 7..6. 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightQuaternary |= 0xffffff3fL; 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (rightQuaternary == 0); 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftQuaternary != rightQuaternary) { 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Return the difference, with script reordering. 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte[] reorderTable = settings.reorderTable; 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (reorderTable != null) { 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert leftQuaternary = Collation.reorder(reorderTable, leftQuaternary); 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rightQuaternary = Collation.reorder(reorderTable, rightQuaternary); 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER; 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (leftQuaternary == Collation.NO_CE_WEIGHT16) { 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Collation.EQUAL; 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 358