1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 1996-2014, International Business Machines 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationcompare.cpp 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012feb14 with new and old collation code 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ucol.h" 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h" 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationcompare.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationiterator.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUCollationResult 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterator &right, 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationSettings &settings, 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode) { 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t options = settings.options; 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t variableTop; 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((options & CollationSettings::ALTERNATE_MASK) == 0) { 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius variableTop = 0; 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // +1 so that we can use "<" and primary ignorables test out early. 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius variableTop = settings.variableTop + 1; 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool anyVariable = FALSE; 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Fetch CEs, compare primaries, store secondary & tertiary weights. 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ALIGN_CODE(16); 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We fetch CEs until we get a non-ignorable primary or reach the end. 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t leftPrimary; 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = left.nextCE(errorCode); 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftPrimary = (uint32_t)(ce >> 32); 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY) { 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Variable CE, shift it to quaternary level. 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Ignore all following primary ignorables, and shift further variable CEs. 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius anyVariable = TRUE; 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Store only the primary of the variable CE. 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius left.setCurrentCE(ce & INT64_C(0xffffffff00000000)); 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce = left.nextCE(errorCode); 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftPrimary = (uint32_t)(ce >> 32); 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftPrimary == 0) { 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius left.setCurrentCE(0); 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(leftPrimary < variableTop && 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY); 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(leftPrimary == 0); 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t rightPrimary; 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = right.nextCE(errorCode); 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightPrimary = (uint32_t)(ce >> 32); 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rightPrimary < variableTop && rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY) { 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Variable CE, shift it to quaternary level. 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Ignore all following primary ignorables, and shift further variable CEs. 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius anyVariable = TRUE; 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Store only the primary of the variable CE. 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius right.setCurrentCE(ce & INT64_C(0xffffffff00000000)); 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce = right.nextCE(errorCode); 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightPrimary = (uint32_t)(ce >> 32); 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rightPrimary == 0) { 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius right.setCurrentCE(0); 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(rightPrimary < variableTop && 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY); 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(rightPrimary == 0); 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftPrimary != rightPrimary) { 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Return the primary difference, with script reordering. 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *reorderTable = settings.reorderTable; 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (reorderTable != NULL) { 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftPrimary = Collation::reorder(reorderTable, leftPrimary); 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightPrimary = Collation::reorder(reorderTable, rightPrimary); 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER; 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftPrimary == Collation::NO_CE_PRIMARY) { break; } 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Compare the buffered secondary & tertiary weights. 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We might skip the secondary level but continue with the case level 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // which is turned on separately. 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) { 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((options & CollationSettings::BACKWARD_SECONDARY) == 0) { 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t leftIndex = 0; 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rightIndex = 0; 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t leftSecondary; 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftSecondary = ((uint32_t)left.getCE(leftIndex++)) >> 16; 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(leftSecondary == 0); 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t rightSecondary; 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightSecondary = ((uint32_t)right.getCE(rightIndex++)) >> 16; 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(rightSecondary == 0); 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftSecondary != rightSecondary) { 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER; 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; } 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The backwards secondary level compares secondary weights backwards 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // within segments separated by the merge separator (U+FFFE, weight 02). 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t leftStart = 0; 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rightStart = 0; 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Find the merge separator or the NO_CE terminator. 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t leftLimit = leftStart; 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t leftLower32; 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while((leftLower32 = (uint32_t)left.getCE(leftLimit)) > 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::MERGE_SEPARATOR_LOWER32 || 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftLower32 == 0) { 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++leftLimit; 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rightLimit = rightStart; 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t rightLower32; 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while((rightLower32 = (uint32_t)right.getCE(rightLimit)) > 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::MERGE_SEPARATOR_LOWER32 || 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightLower32 == 0) { 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++rightLimit; 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Compare the segments. 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t leftIndex = leftLimit; 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rightIndex = rightLimit; 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t leftSecondary = 0; 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(leftSecondary == 0 && leftIndex > leftStart) { 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftSecondary = ((uint32_t)left.getCE(--leftIndex)) >> 16; 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rightSecondary = 0; 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(rightSecondary == 0 && rightIndex > rightStart) { 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightSecondary = ((uint32_t)right.getCE(--rightIndex)) >> 16; 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftSecondary != rightSecondary) { 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER; 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftSecondary == 0) { break; } 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Did we reach the end of either string? 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Both strings have the same number of merge separators, 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // or else there would have been a primary-level difference. 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit)); 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(left.getCE(leftLimit) == Collation::NO_CE) { break; } 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Skip both merge separators and continue. 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftStart = leftLimit + 1; 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightStart = rightLimit + 1; 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((options & CollationSettings::CASE_LEVEL) != 0) { 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t strength = CollationSettings::getStrength(options); 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t leftIndex = 0; 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rightIndex = 0; 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t leftCase, leftLower32, rightCase; 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(strength == UCOL_PRIMARY) { 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Primary+caseLevel: Ignore case level weights of primary ignorables. 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Otherwise we would get a-umlaut > a 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // which is not desirable for accent-insensitive sorting. 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Check for (lower 32 bits) == 0 as well because variable CEs are stored 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // with only primary weights. 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce; 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce = left.getCE(leftIndex++); 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftCase = (uint32_t)ce; 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while((uint32_t)(ce >> 32) == 0 || leftCase == 0); 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftLower32 = leftCase; 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftCase &= 0xc000; 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce = right.getCE(rightIndex++); 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightCase = (uint32_t)ce; 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while((uint32_t)(ce >> 32) == 0 || rightCase == 0); 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightCase &= 0xc000; 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Secondary+caseLevel: By analogy with the above, 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // ignore case level weights of secondary ignorables. 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Note: A tertiary CE has uppercase case bits (0.0.ut) 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // to keep tertiary+caseFirst well-formed. 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables. 219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Otherwise a tertiary CE's uppercase would be no greater than 220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // a primary/secondary CE's uppercase. 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // (See UCA well-formedness condition 2.) 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We could construct a special case weight higher than uppercase, 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // but it's simpler to always ignore case weights of secondary ignorables, 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // turning 0.0.ut into 0.0.0.t. 225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // (See LDML Collation, Case Parameters.) 226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftCase = (uint32_t)left.getCE(leftIndex++); 228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(leftCase <= 0xffff); 229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftLower32 = leftCase; 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftCase &= 0xc000; 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightCase = (uint32_t)right.getCE(rightIndex++); 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(rightCase <= 0xffff); 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightCase &= 0xc000; 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No need to handle NO_CE and MERGE_SEPARATOR specially: 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // There is one case weight for each previous-level weight, 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // so level length differences were handled there. 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftCase != rightCase) { 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((options & CollationSettings::UPPER_FIRST) == 0) { 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER; 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS; 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((leftLower32 >> 16) == Collation::NO_CE_WEIGHT16) { break; } 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; } 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options); 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t leftIndex = 0; 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rightIndex = 0; 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t anyQuaternaries = 0; 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t leftLower32, leftTertiary; 260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftLower32 = (uint32_t)left.getCE(leftIndex++); 262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius anyQuaternaries |= leftLower32; 263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT((leftLower32 & Collation::ONLY_TERTIARY_MASK) != 0 || 264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (leftLower32 & 0xc0c0) == 0); 265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftTertiary = leftLower32 & tertiaryMask; 266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(leftTertiary == 0); 267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t rightLower32, rightTertiary; 269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightLower32 = (uint32_t)right.getCE(rightIndex++); 271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius anyQuaternaries |= rightLower32; 272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 || 273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (rightLower32 & 0xc0c0) == 0); 274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightTertiary = rightLower32 & tertiaryMask; 275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(rightTertiary == 0); 276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftTertiary != rightTertiary) { 278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) { 279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Pass through NO_CE and MERGE_SEPARATOR 280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and keep real tertiary weights larger than the MERGE_SEPARATOR. 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // to keep tertiary CEs well-formed. 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Their case+tertiary weights must be greater than those of 284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // primary and secondary CEs. 285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) { 286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftLower32 > 0xffff) { 287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftTertiary ^= 0xc000; 288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftTertiary += 0x4000; 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rightTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) { 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rightLower32 > 0xffff) { 294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightTertiary ^= 0xc000; 295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightTertiary += 0x4000; 297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER; 301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; } 303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; } 305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!anyVariable && (anyQuaternaries & 0xc0) == 0) { 307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // If there are no "variable" CEs and no non-zero quaternary weights, 308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // then there are no quaternary differences. 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return UCOL_EQUAL; 310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftIndex = 0; 313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightIndex = 0; 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t leftQuaternary; 316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = left.getCE(leftIndex++); 318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftQuaternary = (uint32_t)ce & 0xffff; 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftQuaternary == 0) { 320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Variable primary or completely ignorable. 321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftQuaternary = (uint32_t)(ce >> 32); 322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(leftQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) { 323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Leave NO_CE or MERGE_SEPARATOR as is. 324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Regular CE, not tertiary ignorable. 326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Preserve the quaternary weight in bits 7..6. 327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftQuaternary |= 0xffffff3f; 328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(leftQuaternary == 0); 330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t rightQuaternary; 332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = right.getCE(rightIndex++); 334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightQuaternary = (uint32_t)ce & 0xffff; 335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rightQuaternary == 0) { 336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Variable primary or completely ignorable. 337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightQuaternary = (uint32_t)(ce >> 32); 338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(rightQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) { 339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Leave NO_CE or MERGE_SEPARATOR as is. 340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Regular CE, not tertiary ignorable. 342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Preserve the quaternary weight in bits 7..6. 343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightQuaternary |= 0xffffff3f; 344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(rightQuaternary == 0); 346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftQuaternary != rightQuaternary) { 348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Return the difference, with script reordering. 349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *reorderTable = settings.reorderTable; 350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (reorderTable != NULL) { 351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius leftQuaternary = Collation::reorder(reorderTable, leftQuaternary); 352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rightQuaternary = Collation::reorder(reorderTable, rightQuaternary); 353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER; 355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(leftQuaternary == Collation::NO_CE_WEIGHT16) { break; } 357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return UCOL_EQUAL; 359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 364