10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2013-2014, International Business Machines 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationsets.cpp 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013feb09 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ucharstrie.h" 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ustringtrie.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsets.h" 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h" 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h" 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utf16collationiterator.h" 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utrie2.h" 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_BEGIN 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UBool U_CALLCONV 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusenumTailoredRange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) { 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce32 == Collation::FALLBACK_CE32) { 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; // fallback to base, not tailored 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TailoredSet *ts = (TailoredSet *)context; 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ts->handleCE32(start, end, ce32); 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_END 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::forData(const CollationData *d, UErrorCode &ec) { 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(ec)) { return; } 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = ec; // Preserve info & warning codes. 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius data = d; 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius baseData = d->base; 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(baseData != NULL); 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utrie2_enum(data->trie, NULL, enumTailoredRange, this); 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ec = errorCode; 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) { 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(ce32 != Collation::FALLBACK_CE32); 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isSpecialCE32(ce32)) { 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = data->getIndirectCE32(ce32); 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce32 == Collation::FALLBACK_CE32) { 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return U_SUCCESS(errorCode); 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start)); 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Do not just continue if ce32 == baseCE32 because 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // contractions and expansions in different data objects 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // normally differ even if they have the same data offsets. 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) { 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // fastpath 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce32 != baseCE32) { 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tailored->add(start); 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius compare(start, ce32, baseCE32); 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(++start <= end); 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return U_SUCCESS(errorCode); 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::compare(UChar32 c, uint32_t ce32, uint32_t baseCE32) { 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isPrefixCE32(ce32)) { 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *p = data->contexts + Collation::indexFromCE32(ce32); 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = data->getFinalCE32(CollationData::readCE32(p)); 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isPrefixCE32(baseCE32)) { 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32); 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q)); 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius comparePrefixes(c, p + 2, q + 2); 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addPrefixes(data, c, p + 2); 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(Collation::isPrefixCE32(baseCE32)) { 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32); 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q)); 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addPrefixes(baseData, c, q + 2); 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isContractionCE32(ce32)) { 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *p = data->contexts + Collation::indexFromCE32(ce32); 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) { 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = Collation::NO_CE32; 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = data->getFinalCE32(CollationData::readCE32(p)); 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isContractionCE32(baseCE32)) { 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32); 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((baseCE32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) { 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius baseCE32 = Collation::NO_CE32; 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q)); 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius compareContractions(c, p + 2, q + 2); 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addContractions(c, p + 2); 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(Collation::isContractionCE32(baseCE32)) { 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32); 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q)); 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addContractions(c, q + 2); 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t tag; 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isSpecialCE32(ce32)) { 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tag = Collation::tagFromCE32(ce32); 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(tag != Collation::PREFIX_TAG); 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(tag != Collation::CONTRACTION_TAG); 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Currently, the tailoring data builder does not write offset tags. 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // They might be useful for saving space, 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // but they would complicate the builder, 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and in tailorings we assume that performance of tailored characters is more important. 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(tag != Collation::OFFSET_TAG); 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tag = -1; 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t baseTag; 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isSpecialCE32(baseCE32)) { 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius baseTag = Collation::tagFromCE32(baseCE32); 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(baseTag != Collation::PREFIX_TAG); 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(baseTag != Collation::CONTRACTION_TAG); 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius baseTag = -1; 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Non-contextual mappings, expansions, etc. 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(baseTag == Collation::OFFSET_TAG) { 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We might be comparing a tailoring CE which is a copy of 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // a base offset-tag CE, via the [optimize [set]] syntax 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // or when a single-character mapping was copied for tailored contractions. 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Offset tags always result in long-primary CEs, 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // with common secondary/tertiary weights. 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!Collation::isLongPrimaryCE32(ce32)) { 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)]; 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE); 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::primaryFromLongPrimaryCE32(ce32) != p) { 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(tag != baseTag) { 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(tag == Collation::EXPANSION32_TAG) { 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32); 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = Collation::lengthFromCE32(ce32); 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint32_t *baseCE32s = baseData->ce32s + Collation::indexFromCE32(baseCE32); 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t baseLength = Collation::lengthFromCE32(baseCE32); 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(length != baseLength) { 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < length; ++i) { 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce32s[i] != baseCE32s[i]) { 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(tag == Collation::EXPANSION_TAG) { 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const int64_t *ces = data->ces + Collation::indexFromCE32(ce32); 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = Collation::lengthFromCE32(ce32); 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const int64_t *baseCEs = baseData->ces + Collation::indexFromCE32(baseCE32); 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t baseLength = Collation::lengthFromCE32(baseCE32); 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(length != baseLength) { 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < length; ++i) { 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ces[i] != baseCEs[i]) { 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(tag == Collation::HANGUL_TAG) { 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar jamos[3]; 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = Hangul::decompose(c, jamos); 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(tailored->contains(jamos[0]) || tailored->contains(jamos[1]) || 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (length == 3 && tailored->contains(jamos[2]))) { 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(ce32 != baseCE32) { 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius add(c); 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::comparePrefixes(UChar32 c, const UChar *p, const UChar *q) { 218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Parallel iteration over prefixes of both tables. 219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharsTrie::Iterator prefixes(p, 0, errorCode); 220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharsTrie::Iterator basePrefixes(q, 0, errorCode); 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString *tp = NULL; // Tailoring prefix. 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString *bp = NULL; // Base prefix. 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Use a string with a U+FFFF as the limit sentinel. 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // U+FFFF is untailorable and will not occur in prefixes. 225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString none((UChar)0xffff); 226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(tp == NULL) { 228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(prefixes.next(errorCode)) { 229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tp = &prefixes.getString(); 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tp = &none; 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(bp == NULL) { 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(basePrefixes.next(errorCode)) { 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius bp = &basePrefixes.getString(); 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius bp = &none; 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(tp == &none && bp == &none) { break; } 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t cmp = tp->compare(*bp); 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(cmp < 0) { 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // tp occurs in the tailoring but not in the base. 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addPrefix(data, *tp, c, (uint32_t)prefixes.getValue()); 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tp = NULL; 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(cmp > 0) { 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // bp occurs in the base but not in the tailoring. 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addPrefix(baseData, *bp, c, (uint32_t)basePrefixes.getValue()); 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius bp = NULL; 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setPrefix(*tp); 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius compare(c, (uint32_t)prefixes.getValue(), (uint32_t)basePrefixes.getValue()); 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius resetPrefix(); 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tp = NULL; 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius bp = NULL; 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::compareContractions(UChar32 c, const UChar *p, const UChar *q) { 263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Parallel iteration over suffixes of both tables. 264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharsTrie::Iterator suffixes(p, 0, errorCode); 265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharsTrie::Iterator baseSuffixes(q, 0, errorCode); 266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString *ts = NULL; // Tailoring suffix. 267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString *bs = NULL; // Base suffix. 268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Use a string with two U+FFFF as the limit sentinel. 269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // U+FFFF is untailorable and will not occur in contractions except maybe 270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // as a single suffix character for a root-collator boundary contraction. 271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString none((UChar)0xffff); 272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius none.append((UChar)0xffff); 273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ts == NULL) { 275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(suffixes.next(errorCode)) { 276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ts = &suffixes.getString(); 277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ts = &none; 279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(bs == NULL) { 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(baseSuffixes.next(errorCode)) { 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius bs = &baseSuffixes.getString(); 284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius bs = &none; 286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ts == &none && bs == &none) { break; } 289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t cmp = ts->compare(*bs); 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(cmp < 0) { 291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // ts occurs in the tailoring but not in the base. 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addSuffix(c, *ts); 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ts = NULL; 294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(cmp > 0) { 295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // bs occurs in the base but not in the tailoring. 296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addSuffix(c, *bs); 297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius bs = NULL; 298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius suffix = ts; 300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius compare(c, (uint32_t)suffixes.getValue(), (uint32_t)baseSuffixes.getValue()); 301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius suffix = NULL; 302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ts = NULL; 303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius bs = NULL; 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::addPrefixes(const CollationData *d, UChar32 c, const UChar *p) { 310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharsTrie::Iterator prefixes(p, 0, errorCode); 311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(prefixes.next(errorCode)) { 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addPrefix(d, prefixes.getString(), c, (uint32_t)prefixes.getValue()); 313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32) { 318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setPrefix(pfx); 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = d->getFinalCE32(ce32); 320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(Collation::isContractionCE32(ce32)) { 321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *p = d->contexts + Collation::indexFromCE32(ce32); 322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addContractions(c, p + 2); 323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tailored->add(UnicodeString(unreversedPrefix).append(c)); 325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius resetPrefix(); 326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::addContractions(UChar32 c, const UChar *p) { 330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharsTrie::Iterator suffixes(p, 0, errorCode); 331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(suffixes.next(errorCode)) { 332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addSuffix(c, suffixes.getString()); 333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::addSuffix(UChar32 c, const UnicodeString &sfx) { 338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tailored->add(UnicodeString(unreversedPrefix).append(c).append(sfx)); 339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::add(UChar32 c) { 343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(unreversedPrefix.isEmpty() && suffix == NULL) { 344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tailored->add(c); 345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString s(unreversedPrefix); 347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.append(c); 348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(suffix != NULL) { 349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.append(*suffix); 350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tailored->add(s); 352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::CESink::~CESink() {} 356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_BEGIN 358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UBool U_CALLCONV 360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusenumCnERange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) { 361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ContractionsAndExpansions *cne = (ContractionsAndExpansions *)context; 362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(cne->checkTailored == 0) { 363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // There is no tailoring. 364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No need to collect nor check the tailored set. 365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(cne->checkTailored < 0) { 366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Collect the set of code points with mappings in the tailoring data. 367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce32 == Collation::FALLBACK_CE32) { 368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; // fallback to base, not tailored 369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cne->tailored.add(start, end); 371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // checkTailored > 0: Exclude tailored ranges from the base data enumeration. 373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(start == end) { 374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(cne->tailored.contains(start)) { 375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(cne->tailored.containsSome(start, end)) { 378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cne->ranges.set(start, end).removeAll(cne->tailored); 379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t count = cne->ranges.getRangeCount(); 380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < count; ++i) { 381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32); 382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return U_SUCCESS(cne->errorCode); 384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cne->handleCE32(start, end, ce32); 386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return U_SUCCESS(cne->errorCode); 387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_END 390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) { 393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(ec)) { return; } 394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = ec; // Preserve info & warning codes. 395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Add all from the data, can be tailoring or base. 396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(d->base != NULL) { 397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkTailored = -1; 398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius data = d; 400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utrie2_enum(data->trie, NULL, enumCnERange, this); 401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(d->base == NULL || U_FAILURE(errorCode)) { 402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ec = errorCode; 403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Add all from the base data but only for un-tailored code points. 406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius tailored.freeze(); 407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkTailored = 1; 408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius data = d->base; 409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utrie2_enum(data->trie, NULL, enumCnERange, this); 410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ec = errorCode; 411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec) { 415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(ec)) { return; } 416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = ec; // Preserve info & warning codes. 417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t ce32 = d->getCE32(c); 418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce32 == Collation::FALLBACK_CE32) { 419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius d = d->base; 420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = d->getCE32(c); 421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius data = d; 423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius handleCE32(c, c, ce32); 424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ec = errorCode; 425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) { 429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) { 431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // !isSpecialCE32() 432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(sink != NULL) { 433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->handleCE(Collation::ceFromSimpleCE32(ce32)); 434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius switch(Collation::tagFromCE32(ce32)) { 438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::FALLBACK_TAG: 439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::RESERVED_TAG_3: 441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::BUILDER_DATA_TAG: 442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::LEAD_SURROGATE_TAG: 443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; } 444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::LONG_PRIMARY_TAG: 446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(sink != NULL) { 447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32)); 448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::LONG_SECONDARY_TAG: 451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(sink != NULL) { 452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32)); 453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::LATIN_EXPANSION_TAG: 456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(sink != NULL) { 457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces[0] = Collation::latinCE0FromCE32(ce32); 458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces[1] = Collation::latinCE1FromCE32(ce32); 459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->handleExpansion(ces, 2); 460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Optimization: If we have a prefix, 462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // then the relevant strings have been added already. 463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(unreversedPrefix.isEmpty()) { 464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addExpansions(start, end); 465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::EXPANSION32_TAG: 468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(sink != NULL) { 469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32); 470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = Collation::lengthFromCE32(ce32); 471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < length; ++i) { 472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces[i] = Collation::ceFromCE32(*ce32s++); 473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->handleExpansion(ces, length); 475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Optimization: If we have a prefix, 477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // then the relevant strings have been added already. 478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(unreversedPrefix.isEmpty()) { 479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addExpansions(start, end); 480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::EXPANSION_TAG: 483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(sink != NULL) { 484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = Collation::lengthFromCE32(ce32); 485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length); 486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Optimization: If we have a prefix, 488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // then the relevant strings have been added already. 489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(unreversedPrefix.isEmpty()) { 490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addExpansions(start, end); 491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::PREFIX_TAG: 494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius handlePrefixes(start, end, ce32); 495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::CONTRACTION_TAG: 497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius handleContractions(start, end, ce32); 498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::DIGIT_TAG: 500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Fetch the non-numeric-collation CE32 and continue. 501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = data->ce32s[Collation::indexFromCE32(ce32)]; 502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::U0000_TAG: 504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(start == 0 && end == 0); 505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Fetch the normal ce32 for U+0000 and continue. 506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = data->ce32s[0]; 507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::HANGUL_TAG: 509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(sink != NULL) { 510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // TODO: This should be optimized, 511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // especially if [start..end] is the complete Hangul range. (assert that) 512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTF16CollationIterator iter(data, FALSE, NULL, NULL, NULL); 513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar hangul[1] = { 0 }; 514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(UChar32 c = start; c <= end; ++c) { 515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius hangul[0] = (UChar)c; 516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter.setText(hangul, hangul + 1); 517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = iter.fetchCEs(errorCode); 518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Ignore the terminating non-CE. 520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE); 521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->handleExpansion(iter.getCEs(), length - 1); 522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Optimization: If we have a prefix, 525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // then the relevant strings have been added already. 526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(unreversedPrefix.isEmpty()) { 527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addExpansions(start, end); 528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::OFFSET_TAG: 531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Currently no need to send offset CEs to the sink. 532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case Collation::IMPLICIT_TAG: 534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Currently no need to send implicit CEs to the sink. 535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::handlePrefixes( 542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 start, UChar32 end, uint32_t ce32) { 543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *p = data->contexts + Collation::indexFromCE32(ce32); 544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = CollationData::readCE32(p); // Default if no prefix match. 545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius handleCE32(start, end, ce32); 546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!addPrefixes) { return; } 547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharsTrie::Iterator prefixes(p + 2, 0, errorCode); 548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(prefixes.next(errorCode)) { 549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setPrefix(prefixes.getString()); 550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Prefix/pre-context mappings are special kinds of contractions 551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // that always yield expansions. 552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addStrings(start, end, contractions); 553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addStrings(start, end, expansions); 554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius handleCE32(start, end, (uint32_t)prefixes.getValue()); 555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius resetPrefix(); 557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::handleContractions( 561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 start, UChar32 end, uint32_t ce32) { 562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *p = data->contexts + Collation::indexFromCE32(ce32); 563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) { 564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No match on the single code point. 565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We are underneath a prefix, and the default mapping is just 566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // a fallback to the mappings for a shorter prefix. 567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(!unreversedPrefix.isEmpty()); 568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = CollationData::readCE32(p); // Default if no suffix match. 570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(!Collation::isContractionCE32(ce32)); 571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius handleCE32(start, end, ce32); 572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharsTrie::Iterator suffixes(p + 2, 0, errorCode); 574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(suffixes.next(errorCode)) { 575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius suffix = &suffixes.getString(); 576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addStrings(start, end, contractions); 577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!unreversedPrefix.isEmpty()) { 578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addStrings(start, end, expansions); 579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius handleCE32(start, end, (uint32_t)suffixes.getValue()); 581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius suffix = NULL; 583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::addExpansions(UChar32 start, UChar32 end) { 587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(unreversedPrefix.isEmpty() && suffix == NULL) { 588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(expansions != NULL) { 589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius expansions->add(start, end); 590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addStrings(start, end, expansions); 593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::addStrings(UChar32 start, UChar32 end, UnicodeSet *set) { 598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(set == NULL) { return; } 599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString s(unreversedPrefix); 600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.append(start); 602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(suffix != NULL) { 603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.append(*suffix); 604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius set->add(s); 606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.truncate(unreversedPrefix.length()); 607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while(++start <= end); 608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 613