1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 1996-2011, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: ucol.cpp 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification history 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Comments 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 1996-1999 various members of ICU team maintained C API for collation framework 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 02/16/2001 synwee Added internal method getPrevSpecialCE 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/01/2001 synwee Added maxexpansion functionality. 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestream.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/coleitr.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unorm.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_imp.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "bocsu.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unorm_it.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h" 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "utracimp.h" 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "putilimp.h" 40c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "uassert.h" 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LAST_BYTE_MASK_ 0xFF 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SECOND_LAST_BYTE_SHIFT_ 8 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define ZERO_CC_LIMIT_ 0xC0 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// this is static pointer to the normalizer fcdTrieIndex 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// it is always the same between calls to u_cleanup 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and therefore writing to it is not synchronized. 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is cleaned in ucol_cleanup 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const uint16_t *fcdTrieIndex=NULL; 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Code points at fcdHighStart and above have a zero FCD value. 61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic UChar32 fcdHighStart = 0; 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// These are values from UCA required for 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// implicit generation and supressing sort key compression 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// they should regularly be in the UCA, but if one 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// is running without UCA, it could be a problem 6727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const int32_t maxRegularPrimary = 0x7A; 68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t minImplicitPrimary = 0xE0; 69c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t maxImplicitPrimary = 0xE4; 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool U_CALLCONV 73c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruucol_cleanup(void) 74c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 75c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fcdTrieIndex = NULL; 76c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t U_CALLCONV 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_getFoldingOffset(uint32_t data) { 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(data&0xFFFFFF); 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8627f654740f2a26ad62a5c155af9199af9e69b889claireho// init FCD data 8727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline 8827f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool initializeFCD(UErrorCode *status) { 8927f654740f2a26ad62a5c155af9199af9e69b889claireho if (fcdTrieIndex != NULL) { 9027f654740f2a26ad62a5c155af9199af9e69b889claireho return TRUE; 9127f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 9227f654740f2a26ad62a5c155af9199af9e69b889claireho // The result is constant, until the library is reloaded. 9327f654740f2a26ad62a5c155af9199af9e69b889claireho fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 9427f654740f2a26ad62a5c155af9199af9e69b889claireho ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup); 9527f654740f2a26ad62a5c155af9199af9e69b889claireho return U_SUCCESS(*status); 9627f654740f2a26ad62a5c155af9199af9e69b889claireho } 9727f654740f2a26ad62a5c155af9199af9e69b889claireho} 9827f654740f2a26ad62a5c155af9199af9e69b889claireho 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruinline void IInit_collIterate(const UCollator *collator, const UChar *sourceString, 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sourceLen, collIterate *s, 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) 103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (s)->string = (s)->pos = sourceString; 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->origFlags = 0; 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->flags = 0; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sourceLen >= 0) { 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->flags |= UCOL_ITER_HASLEN; 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->endp = (UChar *)sourceString+sourceLen; 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* change to enable easier checking for end of string for fcdpositon */ 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->endp = NULL; 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->extendCEs = NULL; 116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->extendCEsSize = 0; 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->CEpos = (s)->toReturn = (s)->CEs; 118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->offsetBuffer = NULL; 119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->offsetBufferSize = 0; 120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->offsetReturn = (s)->offsetStore = NULL; 121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0; 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->coll = (collator); 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (s)->nfd = Normalizer2Factory::getNFDInstance(*status); 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->fcdPosition = 0; 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(collator->normalizationMode == UCOL_ON) { 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->flags |= UCOL_ITER_NORM; 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) { 129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->flags |= UCOL_HIRAGANA_Q; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->iterator = NULL; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //(s)->iteratorIndex = 0; 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_init_collIterate(const UCollator *collator, const UChar *sourceString, 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sourceLen, collIterate *s, 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Out-of-line version for use from other files. */ 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(collator, sourceString, sourceLen, s, status); 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI collIterate * U_EXPORT2 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_new_collIterate(UErrorCode *status) { 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collIterate *s = new collIterate; 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(s == NULL) { 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_MEMORY_ALLOCATION_ERROR; 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return s; 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 15650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI void U_EXPORT2 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_delete_collIterate(collIterate *s) { 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete s; 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 16150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UBool U_EXPORT2 16250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_collIterateAtEnd(collIterate *s) { 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return s == NULL || s->pos == s->endp; 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Backup the state of the collIterate struct data 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate to backup 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param backup storage 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void backupState(const collIterate *data, collIterateState *backup) 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->fcdPosition = data->fcdPosition; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->flags = data->flags; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->origFlags = data->origFlags; 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->pos = data->pos; 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backup->bufferaddress = data->writableBuffer.getBuffer(); 17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backup->buffersize = data->writableBuffer.length(); 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->iteratorMove = 0; 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->iteratorIndex = 0; 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->iterator != NULL) { 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER_CURRENT); 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->iteratorIndex = data->iterator->getState(data->iterator); 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // no we try to fixup if we're using a normalizing iterator and we get UITER_NO_STATE 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(backup->iteratorIndex == UITER_NO_STATE) { 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) { 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->iteratorMove++; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->move(data->iterator, -1, UITER_CURRENT); 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT); 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Loads the state into the collIterate struct data 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate to backup 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param backup storage 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param forwards boolean to indicate if forwards iteration is used, 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* false indicates backwards iteration 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void loadState(collIterate *data, const collIterateState *backup, 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool forwards) 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags = backup->flags; 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->origFlags = backup->origFlags; 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->iterator != NULL) { 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO); 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->setState(data->iterator, backup->iteratorIndex, &status); 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(backup->iteratorMove != 0) { 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT); 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = backup->pos; 218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & UCOL_ITER_INNORMBUF) && 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.getBuffer() != backup->bufferaddress) { 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this is when a new buffer has been reallocated and we'll have to 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru calculate the new position. 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru note the new buffer has to contain the contents of the old buffer. 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forwards) { 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.getTerminatedBuffer() + 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (data->pos - backup->bufferaddress); 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* backwards direction */ 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t temp = backup->buffersize - 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int32_t)(data->pos - backup->bufferaddress); 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writableBuffer.length() - temp); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this is alittle tricky. 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if we are initially not in the normalization buffer, even if we 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalize in the later stage, the data in the buffer will be 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ignored, since we skip back up to the data string. 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru however if we are already in the normalization buffer, any 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru further normalization will pull data into the normalization 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer and modify the fcdPosition. 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru since we are keeping the data in the buffer for use, the 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fcdPosition can not be reverted back. 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru arrgghh.... 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition = backup->fcdPosition; 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoreallocCEs(collIterate *data, int32_t newCapacity) { 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t *oldCEs = data->extendCEs; 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(oldCEs == NULL) { 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCEs = data->CEs; 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length = data->CEpos - oldCEs; 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t *newCEs = (uint32_t *)uprv_malloc(newCapacity * 4); 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(newCEs == NULL) { 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_memcpy(newCEs, oldCEs, length * 4); 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(data->extendCEs); 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->extendCEs = newCEs; 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->extendCEsSize = newCapacity; 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->CEpos = newCEs + length; 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoincreaseCEsCapacity(collIterate *data) { 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldCapacity; 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(data->extendCEs != NULL) { 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity = data->extendCEsSize; 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity = LENGTHOF(data->CEs); 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return reallocCEs(data, 2 * oldCapacity); 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoensureCEsCapacity(collIterate *data, int32_t minCapacity) { 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldCapacity; 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(data->extendCEs != NULL) { 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity = data->extendCEsSize; 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity = LENGTHOF(data->CEs); 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(minCapacity <= oldCapacity) { 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity *= 2; 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacity); 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29927f654740f2a26ad62a5c155af9199af9e69b889clairehovoid collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) { 30027f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(errorCode)) { 30127f654740f2a26ad62a5c155af9199af9e69b889claireho return; 30227f654740f2a26ad62a5c155af9199af9e69b889claireho } 30327f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuffer); 30427f654740f2a26ad62a5c155af9199af9e69b889claireho if(length >= offsetBufferSize) { 30527f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE; 30627f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t *newBuffer = reinterpret_cast<int32_t *>(uprv_malloc(newCapacity * 4)); 30727f654740f2a26ad62a5c155af9199af9e69b889claireho if(newBuffer == NULL) { 30827f654740f2a26ad62a5c155af9199af9e69b889claireho errorCode = U_MEMORY_ALLOCATION_ERROR; 30927f654740f2a26ad62a5c155af9199af9e69b889claireho return; 31027f654740f2a26ad62a5c155af9199af9e69b889claireho } 31127f654740f2a26ad62a5c155af9199af9e69b889claireho if(length > 0) { 31227f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_memcpy(newBuffer, offsetBuffer, length * 4); 31327f654740f2a26ad62a5c155af9199af9e69b889claireho } 31427f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(offsetBuffer); 31527f654740f2a26ad62a5c155af9199af9e69b889claireho offsetBuffer = newBuffer; 31627f654740f2a26ad62a5c155af9199af9e69b889claireho offsetStore = offsetBuffer + length; 31727f654740f2a26ad62a5c155af9199af9e69b889claireho offsetBufferSize = newCapacity; 31827f654740f2a26ad62a5c155af9199af9e69b889claireho } 31927f654740f2a26ad62a5c155af9199af9e69b889claireho *offsetStore++ = offset; 32027f654740f2a26ad62a5c155af9199af9e69b889claireho} 32127f654740f2a26ad62a5c155af9199af9e69b889claireho 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collIter_eos() 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Checks for a collIterate being positioned at the end of 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* its source string. 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_eos(collIterate *s) { 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s->flags & UCOL_USE_ITERATOR) { 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !(s->iterator->hasNext(s->iterator)); 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) { 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Null terminated string, but not at null, so not at end. 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Whether in main or normalization buffer doesn't matter. 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // String with length. Can't be in normalization buffer, which is always 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // null termintated. 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s->flags & UCOL_ITER_HASLEN) { 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (s->pos == s->endp); 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are at a null termination, could be either normalization buffer or main string. 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((s->flags & UCOL_ITER_INNORMBUF) == 0) { 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At null at end of main string. 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At null at end of normalization buffer. Need to check whether there there are 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // any characters left in the main buffer. 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s->origFlags & UCOL_USE_ITERATOR) { 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !(s->iterator->hasNext(s->iterator)); 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) { 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Null terminated main string. fcdPosition is the 'return' position into main buf. 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (*s->fcdPosition == 0); 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Main string with an end pointer. 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return s->fcdPosition == s->endp; 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collIter_bos() 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Checks for a collIterate being positioned at the start of 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* its source string. 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_bos(collIterate *source) { 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we're going backwards, we need to know whether there is more in the 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // iterator, even if we are in the side buffer 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) { 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !source->iterator->hasPrevious(source->iterator); 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (source->pos <= source->string || 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((source->flags & UCOL_ITER_INNORMBUF) && 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(source->pos - 1) == 0 && source->fcdPosition == NULL)) { 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_SimpleBos(collIterate *source) { 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we're going backwards, we need to know whether there is more in the 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // iterator, even if we are in the side buffer 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) { 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !source->iterator->hasPrevious(source->iterator); 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (source->pos == source->string) { 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/ 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //return (data->pos == data->string) || 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the open/close functions */ 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UCollator* 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_initFromBinary(const uint8_t *bin, int32_t length, 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *base, 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *fillIn, 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *result = fillIn; 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(base == NULL) { 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we don't support null base yet 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We need these and we could be running without UCA 424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_initImplicitConstants(status); 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCATableHeader *colData = (UCATableHeader *)bin; 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do we want version check here? We're trying to figure out whether collators are compatible 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 || 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0)) || 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru colData->version[0] != UCOL_BUILDER_VERSION) 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_COLLATOR_VERSION_MISMATCH; 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) { 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ucol_initCollator((const UCATableHeader *)bin, result, base, status); 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)){ 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hasRealData = TRUE; 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(base) { 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ucol_initCollator(base->image, result, base, status); 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const UCATableHeader *)bin)->options), status); 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)){ 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hasRealData = FALSE; 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_USELESS_COLLATOR_ERROR; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeImageOnClose = FALSE; 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->actualLocale = NULL; 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->validLocale = NULL; 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->requestedLocale = NULL; 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->rules = NULL; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->rulesLength = 0; 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeRulesOnClose = FALSE; 464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->ucaRules = NULL; 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollator* U_EXPORT2 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_openBinary(const uint8_t *bin, int32_t length, 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *base, 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ucol_initFromBinary(bin, length, base, NULL, status); 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruucol_cloneBinary(const UCollator *coll, 478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *buffer, int32_t capacity, 479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) 480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t length = 0; 482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return length; 484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(capacity < 0) { 486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return length; 488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->hasRealData == TRUE) { 490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru length = coll->image->size; 491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(length <= capacity) { 492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(buffer, coll->image, length); 493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet))); 498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(length <= capacity) { 499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* build the UCATableHeader with minimal entries */ 500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* do not copy the header from the UCA file because its values are wrong! */ 501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */ 502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* reset everything */ 504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(buffer, 0, length); 505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* set the tailoring-specific values */ 507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCATableHeader *myData = (UCATableHeader *)buffer; 508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->size = length; 509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* offset for the options, the only part of the data that is present after the header */ 511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->options = sizeof(UCATableHeader); 512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* need to always set the expansion value for an upper bound of the options */ 514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->expansion = myData->options + sizeof(UColOptionSet); 515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->magic = UCOL_HEADER_MAGIC; 517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->isBigEndian = U_IS_BIG_ENDIAN; 518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->charSetFamily = U_CHARSET_FAMILY; 519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy UCA's version; genrb will override all but the builder version with tailoring data */ 521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo)); 522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo)); 524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo)); 525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo)); 526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->jamoSpecial = coll->image->jamoSpecial; 527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy the collator options */ 529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet)); 530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return length; 535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollator* U_EXPORT2 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t * pBufferSize, UErrorCode *status) 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator * localCollator; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator); 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *stackBufferChars = (char *)stackBuffer; 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t imageSize = 0; 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rulesSize = 0; 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rulesPadding = 0; 546b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t defaultReorderCodesSize = 0; 547b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t reorderCodesSize = 0; 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *image; 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *rules; 550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t* defaultReorderCodes; 551b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t* reorderCodes; 552b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t* leadBytePermutationTable; 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool colAllocated = FALSE; 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool imageAllocated = FALSE; 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status == NULL || U_FAILURE(*status)){ 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((stackBuffer && !pBufferSize) || !coll){ 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 563b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coll->rules && coll->freeRulesOnClose) { 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar); 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar)); 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferSizeNeeded += rulesSize + rulesPadding; 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 569b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // no padding for alignment needed from here since the next two are 4 byte quantities 570b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->defaultReorderCodes) { 571b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho defaultReorderCodesSize = coll->defaultReorderCodesLength * sizeof(int32_t); 572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bufferSizeNeeded += defaultReorderCodesSize; 573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 574b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->reorderCodes) { 575b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho reorderCodesSize = coll->reorderCodesLength * sizeof(int32_t); 576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bufferSizeNeeded += reorderCodesSize; 577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 578b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->leadBytePermutationTable) { 579b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bufferSizeNeeded += 256 * sizeof(uint8_t); 580b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 581b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 582b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (stackBuffer && *pBufferSize <= 0) { /* 'preflighting' request - set needed size into *pBufferSize */ 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pBufferSize = bufferSizeNeeded; 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Pointers on 64-bit platforms need to be aligned 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * on a 64-bit boundry in memory. 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pBufferSize > offsetUp) { 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pBufferSize -= offsetUp; 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stackBufferChars += offsetUp; 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pBufferSize = 1; 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stackBuffer = (void *)stackBufferChars; 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stackBuffer == NULL || *pBufferSize < bufferSizeNeeded) { 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate one here...*/ 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded); 606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Null pointer check. 607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (stackBufferChars == NULL) { 608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru colAllocated = TRUE; 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(*status)) { 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_SAFECLONE_ALLOCATED_WARNING; 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator = (UCollator *)stackBufferChars; 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding); 618b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho defaultReorderCodes = (int32_t*)((uint8_t*)rules + rulesSize); 619b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho reorderCodes = (int32_t*)((uint8_t*)defaultReorderCodes + defaultReorderCodesSize); 620b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho leadBytePermutationTable = (uint8_t*)reorderCodes + reorderCodesSize; 621b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode tempStatus = U_ZERO_ERROR; 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus); 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coll->freeImageOnClose) { 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru image = (uint8_t *)uprv_malloc(imageSize); 628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Null pointer check 629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (image == NULL) { 630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_cloneBinary(coll, image, imageSize, status); 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru imageAllocated = TRUE; 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru image = (uint8_t *)coll->image; 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollator, status); 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coll->rules) { 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coll->freeRulesOnClose) { 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->rules = u_strcpy(rules, coll->rules); 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //bufferEnd += rulesSize; 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->rules = coll->rules; 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->freeRulesOnClose = FALSE; 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->rulesLength = coll->rulesLength; 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 655b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 656b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // collator reordering 657b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->defaultReorderCodes) { 658b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho localCollator->defaultReorderCodes = 659b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (int32_t*) uprv_memcpy(defaultReorderCodes, coll->defaultReorderCodes, coll->defaultReorderCodesLength * sizeof(int32_t)); 660b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho localCollator->defaultReorderCodesLength = coll->defaultReorderCodesLength; 661b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho localCollator->freeDefaultReorderCodesOnClose = FALSE; 662b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 663b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->reorderCodes) { 664b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho localCollator->reorderCodes = 665b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (int32_t*)uprv_memcpy(reorderCodes, coll->reorderCodes, coll->reorderCodesLength * sizeof(int32_t)); 666b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho localCollator->reorderCodesLength = coll->reorderCodesLength; 667b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho localCollator->freeReorderCodesOnClose = FALSE; 668b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 669b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->leadBytePermutationTable) { 670b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho localCollator->leadBytePermutationTable = 671b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (uint8_t*) uprv_memcpy(leadBytePermutationTable, coll->leadBytePermutationTable, 256); 672b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho localCollator->freeLeadBytePermutationTableOnClose = FALSE; 673b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(coll, (UColAttribute)i, status), status); 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // zero copies of pointers 680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru localCollator->actualLocale = NULL; 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->validLocale = NULL; 682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru localCollator->requestedLocale = NULL; 683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru localCollator->ucaRules = coll->ucaRules; // There should only be one copy here. 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->freeOnClose = colAllocated; 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->freeImageOnClose = imageAllocated; 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return localCollator; 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_close(UCollator *coll) 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE); 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll); 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll != NULL) { 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // these are always owned by each UCollator struct, 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so we always free them 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->validLocale != NULL) { 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->validLocale); 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->actualLocale != NULL) { 701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(coll->actualLocale); 702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->requestedLocale != NULL) { 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->requestedLocale); 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->latinOneCEs != NULL) { 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->latinOneCEs); 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->options != NULL && coll->freeOptionsOnClose) { 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->options); 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->rules != NULL && coll->freeRulesOnClose) { 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free((UChar *)coll->rules); 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->image != NULL && coll->freeImageOnClose) { 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free((UCATableHeader *)coll->image); 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 718b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) { 72027f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(coll->leadBytePermutationTable); 72127f654740f2a26ad62a5c155af9199af9e69b889claireho } 722b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(coll->defaultReorderCodes != NULL && coll->freeDefaultReorderCodesOnClose == TRUE) { 723b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(coll->defaultReorderCodes); 724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) { 72627f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(coll->reorderCodes); 72727f654740f2a26ad62a5c155af9199af9e69b889claireho } 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Here, it would be advisable to close: */ 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* - UData for UCA (unless we stuff it in the root resb */ 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Again, do we need additional housekeeping... HMMM! */ 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose); 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->freeOnClose){ 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* for safeClone, if freeOnClose is FALSE, 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru don't free the other instance data */ 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll); 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT(); 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This one is currently used by genrb & tests. After constructing from rules (tailoring),*/ 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* you should be able to get the binary chunk to write out... Doesn't look very full now */ 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint8_t* U_EXPORT2 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status) 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *result = NULL; 748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->hasRealData == TRUE) { 752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *length = coll->image->size; 753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (uint8_t *)uprv_malloc(*length); 754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result == NULL) { 756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(result, coll->image, *length); 760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet))); 762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (uint8_t *)uprv_malloc(*length); 763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result == NULL) { 765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* build the UCATableHeader with minimal entries */ 770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* do not copy the header from the UCA file because its values are wrong! */ 771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */ 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* reset everything */ 774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(result, 0, *length); 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* set the tailoring-specific values */ 777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCATableHeader *myData = (UCATableHeader *)result; 778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->size = *length; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* offset for the options, the only part of the data that is present after the header */ 781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->options = sizeof(UCATableHeader); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* need to always set the expansion value for an upper bound of the options */ 784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->expansion = myData->options + sizeof(UColOptionSet); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->magic = UCOL_HEADER_MAGIC; 787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->isBigEndian = U_IS_BIG_ENDIAN; 788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->charSetFamily = U_CHARSET_FAMILY; 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy UCA's version; genrb will override all but the builder version with tailoring data */ 791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo)); 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo)); 794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo)); 795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo)); 796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->jamoSpecial = coll->image->jamoSpecial; 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy the collator options */ 799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet)); 800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) { 805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->caseFirst = (UColAttributeValue)opts->caseFirst; 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->caseLevel = (UColAttributeValue)opts->caseLevel; 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->frenchCollation = (UColAttributeValue)opts->frenchCollation; 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->normalizationMode = (UColAttributeValue)opts->normalizationMode; 81227f654740f2a26ad62a5c155af9199af9e69b889claireho if(result->normalizationMode == UCOL_ON && !initializeFCD(status)) { 81327f654740f2a26ad62a5c155af9199af9e69b889claireho return; 81427f654740f2a26ad62a5c155af9199af9e69b889claireho } 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->strength = (UColAttributeValue)opts->strength; 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->variableTopValue = opts->variableTopValue; 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->alternateHandling = (UColAttributeValue)opts->alternateHandling; 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ; 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->numericCollation = (UColAttributeValue)opts->numericCollation; 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->caseFirstisDefault = TRUE; 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->caseLevelisDefault = TRUE; 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->frenchCollationisDefault = TRUE; 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->normalizationModeisDefault = TRUE; 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->strengthisDefault = TRUE; 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->variableTopValueisDefault = TRUE; 82627f654740f2a26ad62a5c155af9199af9e69b889claireho result->alternateHandlingisDefault = TRUE; 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hiraganaQisDefault = TRUE; 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->numericCollationisDefault = TRUE; 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_updateInternalState(result, status); 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->options = opts; 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Approximate determination if a character is at a contraction end. 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Guaranteed to be TRUE if a character is at the end of a contraction, 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* otherwise it is not deterministic. 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param c character to be determined 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll collator 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) { 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < coll->minContrEndCP) { 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t hash = c; 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t htbyte; 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { 852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_TRAIL(c)) { 853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru htbyte = coll->contrEndCP[hash>>3]; 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (((htbyte >> (hash & 7)) & 1) == 1); 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* i_getCombiningClass() 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* A fast, at least partly inline version of u_getCombiningClass() 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This is a candidate for further optimization. Used heavily 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* in contraction processing. 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) { 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t sCC = 0; 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) { 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sCC = u_getCombiningClass(c); 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return sCC; 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status) { 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *result = fillIn; 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status) || image == NULL) { 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result == NULL) { 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = (UCollator *)uprv_malloc(sizeof(UCollator)); 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result == NULL) { 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeOnClose = TRUE; 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeOnClose = FALSE; 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->image = image; 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->mapping.getFoldingOffset = _getFoldingOffset; 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosition; 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status); 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result->freeOnClose == TRUE) { 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(result); 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = NULL; 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping); 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->contractionCEs); 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex); 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion); 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->rules = NULL; 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->rulesLength = 0; 914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->freeRulesOnClose = FALSE; 915b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result->defaultReorderCodes = NULL; 916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result->defaultReorderCodesLength = 0; 917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result->freeDefaultReorderCodesOnClose = FALSE; 91827f654740f2a26ad62a5c155af9199af9e69b889claireho result->reorderCodes = NULL; 91927f654740f2a26ad62a5c155af9199af9e69b889claireho result->reorderCodesLength = 0; 920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result->freeReorderCodesOnClose = FALSE; 92127f654740f2a26ad62a5c155af9199af9e69b889claireho result->leadBytePermutationTable = NULL; 922b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result->freeLeadBytePermutationTableOnClose = FALSE; 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get the version info from UCATableHeader and populate the Collator struct*/ 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/ 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/ 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->dataVersion[2] = 0; 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->dataVersion[3] = 0; 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP; 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->minUnsafeCP = 0; 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (c=0; c<0x300; c++) { // Find the smallest unsafe char. 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ucol_unsafeCP(c, result)) break; 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->minUnsafeCP = c; 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP; 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->minContrEndCP = 0; 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (c=0; c<0x300; c++) { // Find the Contraction-ending char. 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ucol_contractionEndCP(c, result)) break; 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->minContrEndCP = c; 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* max expansion tables */ 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->endExpansionCE = (uint32_t*)((uint8_t*)result->image + 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->image->endExpansionCE); 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->lastEndExpansionCE = result->endExpansionCE + 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->image->endExpansionCECount - 1; 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->expansionCESize = (uint8_t*)result->image + 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->image->expansionCESize; 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //result->errorCode = *status; 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->latinOneCEs = NULL; 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->latinOneRegenTable = FALSE; 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->latinOneFailed = FALSE; 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->UCA = UCA; 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Normally these will be set correctly later. This is the default if you use UCA or the default. */ 962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->ucaRules = NULL; 963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->actualLocale = NULL; 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->validLocale = NULL; 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->requestedLocale = NULL; 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hasRealData = FALSE; // real data lives in .dat file... 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeImageOnClose = FALSE; 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96927f654740f2a26ad62a5c155af9199af9e69b889claireho /* set attributes */ 97027f654740f2a26ad62a5c155af9199af9e69b889claireho ucol_setOptionsFromHeader( 97127f654740f2a26ad62a5c155af9199af9e69b889claireho result, 97227f654740f2a26ad62a5c155af9199af9e69b889claireho (UColOptionSet*)((uint8_t*)result->image+result->image->options), 97327f654740f2a26ad62a5c155af9199af9e69b889claireho status); 97427f654740f2a26ad62a5c155af9199af9e69b889claireho result->freeOptionsOnClose = FALSE; 97527f654740f2a26ad62a5c155af9199af9e69b889claireho 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* new Mark's code */ 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For generation of Implicit CEs 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @author Davis 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Cleaned up so that changes can be made more easily. 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Old values: 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First Implicit: E26A792D 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last Implicit: E3DC70C0 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First CJK: E0030300 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last CJK: E0A9DD00 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First CJK_A: E0A9DF00 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last CJK_A: E0DE3100 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following is a port of Mark's code for new treatment of implicits. 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is positioned here, since ucol_initUCA need to initialize the 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * variables below according to the data in the fractional UCA. 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Function used to: 1001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * a) collapse the 2 different Han ranges from UCA into one (in the right order), and 1002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * b) bump any non-CJK characters by 10FFFF. 1003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The relevant blocks are: 1004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * A: 4E00..9FFF; CJK Unified Ideographs 1005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * F900..FAFF; CJK Compatibility Ideographs 1006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * B: 3400..4DBF; CJK Unified Ideographs Extension A 1007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 20000..XX; CJK Unified Ideographs Extension B (and others later on) 1008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * As long as 1009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * no new B characters are allocated between 4E00 and FAFF, and 1010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * no new A characters are outside of this range, 1011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (very high probability) this simple code will work. 1012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The reordered blocks are: 1013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block1 is CJK 1014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block2 is CJK_COMPAT_USED 1015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block3 is CJK_A 1016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (all contiguous) 1017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Any other CJK gets its normal code point 1018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Any non-CJK gets +10FFFF 1019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * When we reorder Block1, we make sure that it is at the very start, 1020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * so that it will use a 3-byte form. 1021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Warning: the we only pick up the compatibility characters that are 1022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * NOT decomposed, so that block is smaller! 1023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// CONSTANTS 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NON_CJK_OFFSET = 0x110000, 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1031b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Precomputed by initImplicitConstants() 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final3Multiplier = 0, 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final4Multiplier = 0, 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final3Count = 0, 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final4Count = 0, 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru medialCount = 0, 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min3Primary = 0, 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min4Primary = 0, 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max4Primary = 0, 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru minTrail = 0, 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxTrail = 0, 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max3Trail = 0, 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max4Trail = 0, 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min4Boundary = 0; 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32 104927f654740f2a26ad62a5c155af9199af9e69b889claireho // 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; 105027f654740f2a26ad62a5c155af9199af9e69b889claireho // 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_BASE = 0x4E00, 105227f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_LIMIT = 0x9FCB+1, 105327f654740f2a26ad62a5c155af9199af9e69b889claireho // Unified CJK ideographs in the compatibility ideographs block. 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_COMPAT_USED_BASE = 0xFA0E, 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_COMPAT_USED_LIMIT = 0xFA2F+1, 105627f654740f2a26ad62a5c155af9199af9e69b889claireho // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;; 105727f654740f2a26ad62a5c155af9199af9e69b889claireho // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_A_BASE = 0x3400, 105927f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_A_LIMIT = 0x4DB5+1, 106027f654740f2a26ad62a5c155af9199af9e69b889claireho // 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;; 106127f654740f2a26ad62a5c155af9199af9e69b889claireho // 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;; 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_B_BASE = 0x20000, 106327f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_B_LIMIT = 0x2A6D6+1, 106427f654740f2a26ad62a5c155af9199af9e69b889claireho // 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;; 106527f654740f2a26ad62a5c155af9199af9e69b889claireho // 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;; 106627f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_C_BASE = 0x2A700, 106727f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_C_LIMIT = 0x2B734+1, 106827f654740f2a26ad62a5c155af9199af9e69b889claireho // 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;; 106927f654740f2a26ad62a5c155af9199af9e69b889claireho // 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;; 107027f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_D_BASE = 0x2B740, 107127f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_D_LIMIT = 0x2B81D+1; 107227f654740f2a26ad62a5c155af9199af9e69b889claireho // when adding to this list, look for all occurrences (in project) 107327f654740f2a26ad62a5c155af9199af9e69b889claireho // of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing!!!! 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 swapCJK(UChar32 i) { 107627f654740f2a26ad62a5c155af9199af9e69b889claireho if (i < CJK_A_BASE) { 107727f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 107827f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_A_LIMIT) { 107927f654740f2a26ad62a5c155af9199af9e69b889claireho // Extension A has lower code points than the original Unihan+compat 108027f654740f2a26ad62a5c155af9199af9e69b889claireho // but sorts higher. 108127f654740f2a26ad62a5c155af9199af9e69b889claireho return i - CJK_A_BASE 108227f654740f2a26ad62a5c155af9199af9e69b889claireho + (CJK_LIMIT - CJK_BASE) 108327f654740f2a26ad62a5c155af9199af9e69b889claireho + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE); 108427f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_BASE) { 108527f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 108627f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_LIMIT) { 108727f654740f2a26ad62a5c155af9199af9e69b889claireho return i - CJK_BASE; 108827f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_COMPAT_USED_BASE) { 108927f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 109027f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_COMPAT_USED_LIMIT) { 109127f654740f2a26ad62a5c155af9199af9e69b889claireho return i - CJK_COMPAT_USED_BASE 109227f654740f2a26ad62a5c155af9199af9e69b889claireho + (CJK_LIMIT - CJK_BASE); 109327f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_B_BASE) { 109427f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 109527f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_B_LIMIT) { 109627f654740f2a26ad62a5c155af9199af9e69b889claireho return i; // non-BMP-CJK 109727f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_C_BASE) { 109827f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 109927f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_C_LIMIT) { 110027f654740f2a26ad62a5c155af9199af9e69b889claireho return i; // non-BMP-CJK 110127f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_D_BASE) { 110227f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 110327f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_D_LIMIT) { 110427f654740f2a26ad62a5c155af9199af9e69b889claireho return i; // non-BMP-CJK 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return i + NON_CJK_OFFSET; // non-CJK 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getRawFromCodePoint(UChar32 i) { 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return swapCJK(i)+1; 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getCodePointFromRaw(UChar32 i) { 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i--; 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 result = 0; 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i >= NON_CJK_OFFSET) { 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i - NON_CJK_OFFSET; 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(i >= CJK_B_BASE) { 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i; 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i < CJK_LIMIT - CJK_BASE) { 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i + CJK_BASE; 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE); 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE); 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = -1; 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// GET IMPLICIT PRIMARY WEIGHTS 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Return value is left justified primary key 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getImplicitFromRaw(UChar32 cp) { 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0 || cp > UCOL_MAX_INPUT) { 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp)); 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last0 = cp - min4Boundary; 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (last0 < 0) { 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last1 = cp / final3Count; 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last0 = cp % final3Count; 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last2 = last1 / medialCount; 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last1 %= medialCount; 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last1 = minTrail + last1; // offset 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last2 = min3Primary + last2; // offset 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (last2 >= min4Primary) { 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2)); 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (last2 << 24) + (last1 << 16) + (last0 << 8); 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last1 = last0 / final4Count; 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last0 %= final4Count; 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last2 = last1 / medialCount; 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last1 %= medialCount; 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last3 = last2 / medialCount; 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last2 %= medialCount; 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last1 = minTrail + last1; // offset 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last2 = minTrail + last2; // offset 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last3 = min4Primary + last3; // offset 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (last3 > max4Primary) { 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3)); 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0; 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t U_EXPORT2 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getImplicitPrimary(UChar32 cp) { 118727f654740f2a26ad62a5c155af9199af9e69b889claireho //fprintf(stdout, "Incoming: %04x\n", cp); 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp)); 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp = swapCJK(cp); 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp++; 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we now have a range of numbers from 0 to 21FFFF. 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp)); 119527f654740f2a26ad62a5c155af9199af9e69b889claireho //fprintf(stdout, "CJK swapped: %04x\n", cp); 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return uprv_uca_getImplicitFromRaw(cp); 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Converts implicit CE into raw integer ("code point") 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param implicit 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return -1 if illegal format 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getRawFromImplicit(uint32_t implicit) { 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 result; 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b3 = implicit & 0xFF; 1209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 b2 = (implicit >> 8) & 0xFF; 1210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 b1 = (implicit >> 16) & 0xFF; 1211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 b0 = (implicit >> 24) & 0xFF; 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // simple parameter checks 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b0 < min3Primary || b0 > max4Primary 1215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || b1 < minTrail || b1 > maxTrail) 1216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // normal offsets 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b1 -= minTrail; 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // take care of the final values, and compose 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b0 < min4Primary) { 1222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (b2 < minTrail || b2 > max3Trail || b3 != 0) 1223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b2 -= minTrail; 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 remainder = b2 % final3Multiplier; 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (remainder != 0) 1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b0 -= min3Primary; 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b2 /= final3Multiplier; 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ((b0 * medialCount) + b1) * final3Count + b2; 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (b2 < minTrail || b2 > maxTrail 1233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || b3 < minTrail || b3 > max4Trail) 1234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b2 -= minTrail; 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b3 -= minTrail; 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 remainder = b3 % final4Multiplier; 1238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (remainder != 0) 1239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b3 /= final4Multiplier; 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b0 -= min4Primary; 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary; 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // final check 1245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result < 0 || result > UCOL_MAX_INPUT) 1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t divideAndRoundUp(int a, int b) { 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1 + (a-1)/b; 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this function is either called from initUCA or from genUCA before 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * doing canonical closure for the UCA. 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set up to generate implicits. 1261b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Maintenance Note: this function may end up being called more than once, due 1262b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * to threading races during initialization. Make sure that 1263b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * none of the Constants is ever transiently assigned an 1264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * incorrect value. 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param minPrimary 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param maxPrimary 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param minTrail final byte 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param maxTrail final byte 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param gap3 the gap we leave for tailoring for 3-byte forms 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param gap4 the gap we leave for tailoring for 4-byte forms 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void initImplicitConstants(int minPrimary, int maxPrimary, 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int minTrailIn, int maxTrailIn, 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int gap3, int primaries3count, 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // some simple parameter checks 1277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) 1278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF) 1279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (primaries3count < 1)) 1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru minTrail = minTrailIn; 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxTrail = maxTrailIn; 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min3Primary = minPrimary; 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max4Primary = maxPrimary; 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // compute constants for use later. 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // number of values we can use in trailing bytes 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // leave room for empty values between AND above, e.g. if gap = 2 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // range 3..7 => +3 -4 -5 -6 -7: so 1 value 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final3Multiplier = gap3 + 1; 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final3Count = (maxTrail - minTrail + 1) / final3Multiplier; 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max3Trail = minTrail + (final3Count - 1) * final3Multiplier; 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // medials can use full range 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru medialCount = (maxTrail - minTrail + 1); 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find out how many values fit in each form 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t threeByteCount = medialCount * final3Count; 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // now determine where the 3/4 boundary is. 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we use 3 bytes below the boundary, and 4 above 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t primariesAvailable = maxPrimary - minPrimary + 1; 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t primaries4count = primariesAvailable - primaries3count; 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t min3ByteCoverage = primaries3count * threeByteCount; 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min4Primary = minPrimary + primaries3count; 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min4Boundary = min3ByteCoverage; 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now expand out the multiplier for the 4 bytes, and redo. 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary; 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count); 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount); 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte; 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (gap4 < 1) { 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final4Multiplier = gap4 + 1; 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final4Count = neededPerFinalByte; 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max4Trail = minTrail + (final4Count - 1) * final4Multiplier; 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Supply parameters for generating implicit CEs 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 1332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruuprv_uca_initImplicitConstants(UErrorCode *status) { 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms. 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status); 1335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status); 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* collIterNormalize Incremental Normalization happens here. */ 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* pick up the range of chars identifed by FCD, */ 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* normalize it into the collIterate's writable buffer, */ 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* switch the collIterate's state to use the writable buffer. */ 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid collIterNormalize(collIterate *collationSource) 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *srcP = collationSource->pos - 1; /* Start of chars to normalize */ 134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *endP = collationSource->fcdPosition; /* End of region to normalize+1 */ 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - srcP)), 135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collationSource->writableBuffer, 135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status); 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_errorName(status)); 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collationSource->pos = collationSource->writableBuffer.getTerminatedBuffer(); 1362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->origFlags = collationSource->flags; 1363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->flags |= UCOL_ITER_INNORMBUF; 1364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR); 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This function takes the iterator and extracts normalized stuff up to the next boundary 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is similar in the end results to the collIterNormalize, but for the cases when we 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// use an iterator 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizeIterator(collIterate *collationSource) { 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasNormalized = FALSE; 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->iterator, UITER_CURRENT); 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t iterIndex = collationSource->iterator->getState(collationSource->iterator); 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer, 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status); 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(status == U_BUFFER_OVERFLOW_ERROR || normLen == (int32_t)collationSource->writableBufSize) { 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reallocate and terminate 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!u_growBufferFromStatic(collationSource->stackWritableBuffer, 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &collationSource->writableBuffer, 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int32_t *)&collationSource->writableBufSize, normLen + 1, 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0) 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #ifdef UCOL_DEBUG 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "normalizeIterator(), out of memory\n"); 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #endif 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //collationSource->iterator->move(collationSource->iterator, iterIndex, UITER_ZERO); 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->iterator->setState(collationSource->iterator, iterIndex, &status); 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer, 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status); 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Terminate the buffer - we already checked that it is big enough 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->writableBuffer[normLen] = 0; 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(collationSource->writableBuffer != collationSource->stackWritableBuffer) { 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->flags |= UCOL_ITER_ALLOCATED; 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->pos = collationSource->writableBuffer; 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->origFlags = collationSource->flags; 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->flags |= UCOL_ITER_INNORMBUF; 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR); 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/ 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Incremental FCD check and normalize */ 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Called from getNextCE when normalization state is suspect. */ 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* When entering, the state is known to be this: */ 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* o We are working in the main buffer of the collIterate, not the side */ 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* writable buffer. When in the side buffer, normalization mode is always off, */ 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* so we won't get here. */ 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* o The leading combining class from the current character is 0 or */ 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* the trailing combining class of the previous char was zero. */ 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* True because the previous call to this function will have always exited */ 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* that way, and we get called for every char where cc might be non-zero. */ 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIterFCD(collIterate *collationSource) { 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcP, *endP; 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadingCC; 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t prevTrailingCC = 0; 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t fcd; 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool needNormalize = FALSE; 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcP = collationSource->pos-1; 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collationSource->flags & UCOL_ITER_HASLEN) { 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru endP = collationSource->endp; 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru endP = NULL; 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get the trailing combining class of the current character. If it's zero, 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we are OK. 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trie access */ 1438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP); 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fcd != 0) { 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevTrailingCC != 0) { 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The current char has a non-zero trailing CC. Scan forward until we find 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a char with a leading cc of zero. 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (endP == NULL || srcP != endP) 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *savedSrcP = srcP; 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trie access */ 1450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP); 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (leadingCC == 0) { 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcP = savedSrcP; // Hit char that is not part of combining sequence. 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // back up over it. (Could be surrogate pair!) 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (leadingCC < prevTrailingCC) { 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru needNormalize = TRUE; 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->fcdPosition = (UChar *)srcP; 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return needNormalize; 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the CE retrieval functions */ 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t getImplicit(UChar32 cp, collIterate *collationSource); 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource); 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there should be a macro version of this function in the header file */ 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the first function that tries to fetch a collation element */ 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* If it's not succesfull or it encounters a more difficult situation */ 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* some more sofisticated and slower functions are invoked */ 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) { 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t order = 0; 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collationSource->CEpos > collationSource->toReturn) { /* Are there any CEs from previous expansions? */ 1488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = *(collationSource->toReturn++); /* if so, return them */ 1489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(collationSource->CEpos == collationSource->toReturn) { 1490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->CEpos = collationSource->toReturn = collationSource->extendCEs ? collationSource->extendCEs : collationSource->CEs; 1491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return order; 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ch = 0; 1496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->offsetReturn = NULL; 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 1499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (;;) /* Loop handles case when incremental normalize switches */ 1500b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { /* to or from the side buffer / original string, and we */ 1501b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* need to start again to get the next character. */ 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1503b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) 1504b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { 1505b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // The source string is null terminated and we're not working from the side buffer, 1506b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // and we're not normalizing. This is the fast path. 1507b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.) 1508b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ch = *collationSource->pos++; 1509b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch != 0) { 1510b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 1511b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1512b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else { 1513b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return UCOL_NO_MORE_CES; 1514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1517b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (collationSource->flags & UCOL_ITER_HASLEN) { 1518b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Normal path for strings when length is specified. 1519b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (We can't be in side buffer because it is always null terminated.) 1520b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (collationSource->pos >= collationSource->endp) { 1521b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Ran off of the end of the main source string. We're done. 1522b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return UCOL_NO_MORE_CES; 1523b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1524b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ch = *collationSource->pos++; 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1526b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else if(collationSource->flags & UCOL_USE_ITERATOR) { 1527b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar32 iterCh = collationSource->iterator->next(collationSource->iterator); 1528b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(iterCh == U_SENTINEL) { 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NO_MORE_CES; 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1531b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ch = (UChar)iterCh; 1532b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1533b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else 1534b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { 1535b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Null terminated string. 1536b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ch = *collationSource->pos++; 1537b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch == 0) { 1538b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Ran off end of buffer. 1539b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { 1540b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Ran off end of main string. backing up one character. 1541b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho collationSource->pos--; 1542b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return UCOL_NO_MORE_CES; 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1544b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else 1545b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { 1546b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Hit null in the normalize side buffer. 1547b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Usually this means the end of the normalized data, 1548b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // except for one odd case: a null followed by combining chars, 1549b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // which is the case if we are at the start of the buffer. 1550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) { 1551b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 1552b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1554b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Null marked end of side buffer. 1555b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Revert to the main string and 1556b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // loop back to top to try again to get a character. 1557b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho collationSource->pos = collationSource->fcdPosition; 1558b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho collationSource->flags = collationSource->origFlags; 1559b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho continue; 1560b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1564b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(collationSource->flags&UCOL_HIRAGANA_Q) { 1565b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag 1566b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * based on whether the previous codepoint was Hiragana or Katakana. 1567b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 1568b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) || 1569b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) { 1570b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho collationSource->flags |= UCOL_WAS_HIRAGANA; 1571b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho collationSource->flags &= ~UCOL_WAS_HIRAGANA; 1573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We've got a character. See if there's any fcd and/or normalization stuff to do. 1577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer. 1578b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((collationSource->flags & UCOL_ITER_NORM) == 0) { 1579b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 1580b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1582b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (collationSource->fcdPosition >= collationSource->pos) { 1583b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // An earlier FCD check has already covered the current character. 1584b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We can go ahead and process this char. 1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1588b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch < ZERO_CC_LIMIT_ ) { 1589b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Fast fcd safe path. Trailing combining class == 0. This char is OK. 1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1593b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { 1594b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We need to peek at the next character in order to tell if we are FCD 1595b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) { 1596b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We are at the last char of source string. 1597b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // It is always OK for FCD check. 1598b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 1599b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1600b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1601b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test 1602b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { 1603b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 1604b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1605b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1608b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Need a more complete FCD check and possible normalization. 1609b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (collIterFCD(collationSource)) { 1610b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho collIterNormalize(collationSource); 1611b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1612b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { 1613b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // No normalization was needed. Go ahead and process the char we already had. 1614b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 1615b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1616b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1617b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Some normalization happened. Next loop iteration will pick up a char 1618b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // from the normalization buffer. 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1620b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } // end for (;;) 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1623b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch <= 0xFF) { 1624b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* For latin-1 characters we never need to fall back to the UCA table */ 1625b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* because all of the UCA data is replicated in the latinOneMapping array */ 1626b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = coll->latinOneMapping[ch]; 1627b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (order > UCOL_NOT_FOUND) { 1628b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); 1629b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1631b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else 1632b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { 1633b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Always use UCA for Han, Hangul 1634b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (Han extension A is before main Han block) 1635b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** Han compatibility chars ?? **** 1636b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && 1637b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { 1638b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { 1639b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // between the two target ranges; do normal lookup 1640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** this range is YI, Modifier tone letters, **** 1641b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** Latin-D, Syloti Nagari, Phagas-pa. **** 1642b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** Latin-D might be tailored, so we need to **** 1643b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** do the normal lookup for these guys. **** 1644b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 1645b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1646b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // in one of the target ranges; use UCA 1647b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = UCOL_NOT_FOUND; 1648b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1649b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1650b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 1651b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1653b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(order > UCOL_NOT_FOUND) { /* if a CE is special */ 1654b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */ 1655b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1656b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1657b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */ 1658b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */ 1659b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1661b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */ 1662b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status); 1663b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1666b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL ); 1667b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(order == UCOL_NOT_FOUND) { 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru order = getImplicit(ch, collationSource); 1670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return order; /* return the CE */ 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_getNextCE, out-of-line version for use from other files. */ 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) { 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ucol_IGetNextCE(coll, collationSource, status); 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Incremental previous normalization happens here. Pick up the range of chars 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* identifed by FCD, normalize it into the collIterate's writable buffer, 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* switch the collIterate's state to use the writable buffer. 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid collPrevIterNormalize(collIterate *data) 1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 169150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pEnd = data->pos; /* End normalize + 1 */ 169250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pStart; 1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Start normalize */ 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->fcdPosition == NULL) { 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStart = data->string; 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStart = data->fcdPosition + 1; 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t normLen = 170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pStart) + 1)), 170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer, 170550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status). 170650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length(); 170750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(status)) { 170850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this puts the null termination infront of the normalized string instead 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru of the end 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 171450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.insert(0, (UChar)0); 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 1717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The usual case at this point is that we've got a base 1718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * character followed by marks that were normalized. If 1719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * fcdPosition is NULL, that means that we backed up to 1720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * the beginning of the string and there's no base character. 1721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Forward processing will usually normalize when it sees 1723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * the first mark, so that mark will get it's natural offset 1724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * and the rest will get the offset of the character following 1725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * the marks. The base character will also get its natural offset. 1726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * We write the offset of the base character, if there is one, 1728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * followed by the offset of the first mark and then the offsets 1729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * of the rest of the marks. 1730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t firstMarkOffset = 0; 173250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t trailOffset = (int32_t)(data->pos - data->string + 1); 1733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t trailCount = normLen - 1; 1734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->fcdPosition != NULL) { 173650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t baseOffset = (int32_t)(data->fcdPosition - data->string); 1737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar baseChar = *data->fcdPosition; 1738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru firstMarkOffset = baseOffset + 1; 1740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 174250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the base character is the start of a contraction, forward processing 174350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * will normalize the marks while checking for the contraction, which means 174450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * that the offset of the first mark will the same as the other marks. 174550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 174650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * **** THIS IS PROBABLY NOT A COMPLETE TEST **** 174750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 174850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (baseChar >= 0x100) { 174950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, baseChar); 175050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 175150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) { 175250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, baseChar); 175350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 175450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 175550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION_TAG) { 175650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstMarkOffset = trailOffset; 175750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 175850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 176027f654740f2a26ad62a5c155af9199af9e69b889claireho data->appendOffset(baseOffset, status); 1761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 176327f654740f2a26ad62a5c155af9199af9e69b889claireho data->appendOffset(firstMarkOffset, status); 1764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (int32_t i = 0; i < trailCount; i += 1) { 176627f654740f2a26ad62a5c155af9199af9e69b889claireho data->appendOffset(trailOffset, status); 1767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetRepeatValue = trailOffset; 1770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetReturn = data->offsetStore - 1; 1772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->offsetReturn == data->offsetBuffer) { 1773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetStore = data->offsetBuffer; 1774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 177650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.getTerminatedBuffer() + 1 + normLen; 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->origFlags = data->flags; 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags |= UCOL_ITER_INNORMBUF; 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Incremental FCD check for previous iteration and normalize. Called from 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* getPrevCE when normalization state is suspect. 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* When entering, the state is known to be this: 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* o We are working in the main buffer of the collIterate, not the side 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* writable buffer. When in the side buffer, normalization mode is always 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* off, so we won't get here. 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* o The leading combining class from the current character is 0 or the 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* trailing combining class of the previous char was zero. 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* True because the previous call to this function will have always exited 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* that way, and we get called for every char where cc might be non-zero. 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterate struct 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return normalization status, TRUE for normalization to be done, FALSE 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* otherwise 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collPrevIterFCD(collIterate *data) 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, *start; 1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadingCC; 1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t trailingCC = 0; 1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t fcd; 1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool result = FALSE; 1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start = data->string; 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src = data->pos + 1; 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Get the trailing combining class of the current character. */ 1811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src); 1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); 1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (leadingCC != 0) { 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru The current char has a non-zero leading combining class. 1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Scan backward until we find a char with a trailing cc of zero. 1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) 1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start == src) { 1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition = NULL; 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1827b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src); 1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); 1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (trailingCC == 0) { 1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (leadingCC < trailingCC) { 1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = TRUE; 1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); 1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition = (UChar *)src; 1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 184827f654740f2a26ad62a5c155af9199af9e69b889claireho/** gets a code unit from the string at a given offset 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Handles both normal and iterative cases. 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * No error checking - caller beware! 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 185227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline 185327f654740f2a26ad62a5c155af9199af9e69b889clairehoUChar peekCodeUnit(collIterate *source, int32_t offset) { 1854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->pos != NULL) { 1855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->pos + offset); 1856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(source->iterator != NULL) { 185727f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c; 1858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(offset != 0) { 1859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->iterator->move(source->iterator, offset, UITER_CURRENT); 186027f654740f2a26ad62a5c155af9199af9e69b889claireho c = source->iterator->next(source->iterator); 1861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->iterator->move(source->iterator, -offset-1, UITER_CURRENT); 1862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 186327f654740f2a26ad62a5c155af9199af9e69b889claireho c = source->iterator->current(source->iterator); 1864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 186527f654740f2a26ad62a5c155af9199af9e69b889claireho return c >= 0 ? (UChar)c : 0xfffd; // If the caller works properly, we should never see c<0. 1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 186727f654740f2a26ad62a5c155af9199af9e69b889claireho return 0xfffd; 1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 187127f654740f2a26ad62a5c155af9199af9e69b889claireho// Code point version. Treats the offset as a _code point_ delta. 187227f654740f2a26ad62a5c155af9199af9e69b889claireho// We cannot use U16_FWD_1_UNSAFE and similar because we might not have well-formed UTF-16. 187327f654740f2a26ad62a5c155af9199af9e69b889claireho// We cannot use U16_FWD_1 and similar because we do not know the start and limit of the buffer. 187427f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline 187527f654740f2a26ad62a5c155af9199af9e69b889clairehoUChar32 peekCodePoint(collIterate *source, int32_t offset) { 187627f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c; 187727f654740f2a26ad62a5c155af9199af9e69b889claireho if(source->pos != NULL) { 187827f654740f2a26ad62a5c155af9199af9e69b889claireho const UChar *p = source->pos; 187927f654740f2a26ad62a5c155af9199af9e69b889claireho if(offset >= 0) { 188027f654740f2a26ad62a5c155af9199af9e69b889claireho // Skip forward over (offset-1) code points. 188127f654740f2a26ad62a5c155af9199af9e69b889claireho while(--offset >= 0) { 188227f654740f2a26ad62a5c155af9199af9e69b889claireho if(U16_IS_LEAD(*p++) && U16_IS_TRAIL(*p)) { 188327f654740f2a26ad62a5c155af9199af9e69b889claireho ++p; 188427f654740f2a26ad62a5c155af9199af9e69b889claireho } 188527f654740f2a26ad62a5c155af9199af9e69b889claireho } 188627f654740f2a26ad62a5c155af9199af9e69b889claireho // Read the code point there. 188727f654740f2a26ad62a5c155af9199af9e69b889claireho c = *p++; 188827f654740f2a26ad62a5c155af9199af9e69b889claireho UChar trail; 188927f654740f2a26ad62a5c155af9199af9e69b889claireho if(U16_IS_LEAD(c) && U16_IS_TRAIL(trail = *p)) { 189027f654740f2a26ad62a5c155af9199af9e69b889claireho c = U16_GET_SUPPLEMENTARY(c, trail); 189127f654740f2a26ad62a5c155af9199af9e69b889claireho } 189227f654740f2a26ad62a5c155af9199af9e69b889claireho } else /* offset<0 */ { 189327f654740f2a26ad62a5c155af9199af9e69b889claireho // Skip backward over (offset-1) code points. 189427f654740f2a26ad62a5c155af9199af9e69b889claireho while(++offset < 0) { 189527f654740f2a26ad62a5c155af9199af9e69b889claireho if(U16_IS_TRAIL(*--p) && U16_IS_LEAD(*(p - 1))) { 189627f654740f2a26ad62a5c155af9199af9e69b889claireho --p; 189727f654740f2a26ad62a5c155af9199af9e69b889claireho } 189827f654740f2a26ad62a5c155af9199af9e69b889claireho } 189927f654740f2a26ad62a5c155af9199af9e69b889claireho // Read the code point before that. 190027f654740f2a26ad62a5c155af9199af9e69b889claireho c = *--p; 190127f654740f2a26ad62a5c155af9199af9e69b889claireho UChar lead; 190227f654740f2a26ad62a5c155af9199af9e69b889claireho if(U16_IS_TRAIL(c) && U16_IS_LEAD(lead = *(p - 1))) { 190327f654740f2a26ad62a5c155af9199af9e69b889claireho c = U16_GET_SUPPLEMENTARY(lead, c); 190427f654740f2a26ad62a5c155af9199af9e69b889claireho } 190527f654740f2a26ad62a5c155af9199af9e69b889claireho } 190627f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(source->iterator != NULL) { 190727f654740f2a26ad62a5c155af9199af9e69b889claireho if(offset >= 0) { 190827f654740f2a26ad62a5c155af9199af9e69b889claireho // Skip forward over (offset-1) code points. 190927f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t fwd = offset; 191027f654740f2a26ad62a5c155af9199af9e69b889claireho while(fwd-- > 0) { 191127f654740f2a26ad62a5c155af9199af9e69b889claireho uiter_next32(source->iterator); 191227f654740f2a26ad62a5c155af9199af9e69b889claireho } 191327f654740f2a26ad62a5c155af9199af9e69b889claireho // Read the code point there. 191427f654740f2a26ad62a5c155af9199af9e69b889claireho c = uiter_current32(source->iterator); 191527f654740f2a26ad62a5c155af9199af9e69b889claireho // Return to the starting point, skipping backward over (offset-1) code points. 191627f654740f2a26ad62a5c155af9199af9e69b889claireho while(offset-- > 0) { 191727f654740f2a26ad62a5c155af9199af9e69b889claireho uiter_previous32(source->iterator); 191827f654740f2a26ad62a5c155af9199af9e69b889claireho } 191927f654740f2a26ad62a5c155af9199af9e69b889claireho } else /* offset<0 */ { 192027f654740f2a26ad62a5c155af9199af9e69b889claireho // Read backward, reading offset code points, remember only the last-read one. 192127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t back = offset; 192227f654740f2a26ad62a5c155af9199af9e69b889claireho do { 192327f654740f2a26ad62a5c155af9199af9e69b889claireho c = uiter_previous32(source->iterator); 192427f654740f2a26ad62a5c155af9199af9e69b889claireho } while(++back < 0); 192527f654740f2a26ad62a5c155af9199af9e69b889claireho // Return to the starting position, skipping forward over offset code points. 192627f654740f2a26ad62a5c155af9199af9e69b889claireho do { 192727f654740f2a26ad62a5c155af9199af9e69b889claireho uiter_next32(source->iterator); 192827f654740f2a26ad62a5c155af9199af9e69b889claireho } while(++offset < 0); 192927f654740f2a26ad62a5c155af9199af9e69b889claireho } 193027f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 193127f654740f2a26ad62a5c155af9199af9e69b889claireho c = U_SENTINEL; 193227f654740f2a26ad62a5c155af9199af9e69b889claireho } 193327f654740f2a26ad62a5c155af9199af9e69b889claireho return c; 193427f654740f2a26ad62a5c155af9199af9e69b889claireho} 193527f654740f2a26ad62a5c155af9199af9e69b889claireho 1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Determines if we are at the start of the data string in the backwards 1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collation iterator 1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator 1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return TRUE if we are at the start 1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool isAtStartPrevIterate(collIterate *data) { 1944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->pos == NULL && data->iterator != NULL) { 1945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return !data->iterator->hasPrevious(data->iterator); 1946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //return (collIter_bos(data)) || 1948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (data->pos == data->string) || 1949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ((data->flags & UCOL_ITER_INNORMBUF) && 1950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(data->pos - 1) == 0 && data->fcdPosition == NULL); 1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void goBackOne(collIterate *data) { 1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# if 0 1956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // somehow, it looks like we need to keep iterator synced up 1957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // at all times, as above. 1958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->pos) { 1959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->pos--; 1960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->iterator) { 1962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->iterator->previous(data->iterator); 1963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) { 1966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->iterator->previous(data->iterator); 1967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->pos) { 1969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->pos --; 1970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inline function that gets a simple CE. 1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* So what it does is that it will first check the expansion buffer. If the 1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* expansion buffer is not empty, ie the end pointer to the expansion buffer 1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* is different from the string pointer, we return the collation element at the 1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* return pointer and decrement it. 1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* For more complicated CEs it resorts to getComplicatedCE. 1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll collator data 1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator struct 1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param status error status 1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data, 1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t result = (uint32_t)UCOL_NULLORDER; 1989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->offsetReturn != NULL) { 1991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->offsetRepeatCount > 0) { 1992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetRepeatCount -= 1; 1993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->offsetReturn == data->offsetBuffer) { 1995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetReturn = NULL; 199627f654740f2a26ad62a5c155af9199af9e69b889claireho data->offsetStore = data->offsetBuffer; 1997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetReturn -= 1; 1999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((data->extendCEs && data->toReturn > data->extendCEs) || 2004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (!data->extendCEs && data->toReturn > data->CEs)) 2005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->toReturn -= 1; 2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = *(data->toReturn); 2008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->CEs == data->toReturn || data->extendCEs == data->toReturn) { 2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->CEpos = data->toReturn; 2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ch = 0; 2014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2015b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 2016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* 2017b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Loop handles case when incremental normalize switches to or from the 2018b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho side buffer / original string, and we need to start again to get the 2019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho next character. 2020b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 2021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (;;) { 2022b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (data->flags & UCOL_ITER_HASLEN) { 2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2024b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Normal path for strings when length is specified. 2025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Not in side buffer because it is always null terminated. 2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2027b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (data->pos <= data->string) { 2028b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* End of the main source string */ 2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NO_MORE_CES; 2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho data->pos --; 2032b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ch = *data->pos; 2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2034b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // we are using an iterator to go back. Pray for us! 2035b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else if (data->flags & UCOL_USE_ITERATOR) { 2036b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar32 iterCh = data->iterator->previous(data->iterator); 2037b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(iterCh == U_SENTINEL) { 2038b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return UCOL_NO_MORE_CES; 2039b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 2040b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ch = (UChar)iterCh; 2041b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2042b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2043b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else { 2044b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho data->pos --; 2045b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ch = *data->pos; 2046b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* we are in the side buffer. */ 2047b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch == 0) { 2048b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* 2049b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho At the start of the normalize side buffer. 2050b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Go back to string. 2051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Because pointer points to the last accessed character, 2052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hence we have to increment it by one here. 2053b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 2054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho data->flags = data->origFlags; 2055b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho data->offsetRepeatValue = 0; 2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (data->fcdPosition == NULL) { 2058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho data->pos = data->string; 2059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return UCOL_NO_MORE_CES; 2060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2061b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else { 2062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho data->pos = data->fcdPosition + 1; 2063b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho continue; 2066b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2067b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2069b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(data->flags&UCOL_HIRAGANA_Q) { 2070b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(ch>=0x3040 && ch<=0x309f) { 2071b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho data->flags |= UCOL_WAS_HIRAGANA; 2072b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 2073b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho data->flags &= ~UCOL_WAS_HIRAGANA; 2074b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2077b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* 2078b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * got a character to determine if there's fcd and/or normalization 2079b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * stuff to do. 2080b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * if the current character is not fcd. 2081b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * if current character is at the start of the string 2082b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Trailing combining class == 0. 2083b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Note if pos is in the writablebuffer, norm is always 0 2084b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 2085b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch < ZERO_CC_LIMIT_ || 2086b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // this should propel us out of the loop in the iterator case 2087b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (data->flags & UCOL_ITER_NORM) == 0 || 2088b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (data->fcdPosition != NULL && data->fcdPosition <= data->pos) 2089b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho || data->string == data->pos) { 2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2093b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { 2094b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* if next character is FCD */ 2095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (data->pos == data->string) { 2096b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* First char of string is always OK for FCD check */ 2097b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 2098b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Not first char of string, do the FCD fast test */ 2101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { 2102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 2103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Need a more complete FCD check and possible normalization. */ 2107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (collPrevIterFCD(data)) { 2108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho collPrevIterNormalize(data); 2109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { 2112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* No normalization. Go ahead and process the char. */ 2113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 2114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 2116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* 2117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Some normalization happened. 2118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Next loop picks up a char from the normalization buffer. 2119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 2120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 2122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* attempt to handle contractions, after removal of the backwards 2123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho contraction 2124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 2125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) { 2126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status); 2127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 2128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch <= 0xFF) { 2129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = coll->latinOneMapping[ch]; 2130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho else { 2132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Always use UCA for [3400..9FFF], [AC00..D7AF] 2133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** [FA0E..FA2F] ?? **** 2134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && 2135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (ch >= 0x3400 && ch <= 0xD7AF)) { 2136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ch > 0x9FFF && ch < 0xAC00) { 2137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // between the two target ranges; do normal lookup 2138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** this range is YI, Modifier tone letters, **** 2139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** Latin-D, Syloti Nagari, Phagas-pa. **** 2140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** Latin-D might be tailored, so we need to **** 2141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // **** do the normal lookup for these guys. **** 2142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 2143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 2144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = UCOL_NOT_FOUND; 2145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 2147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 2148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result > UCOL_NOT_FOUND) { 2151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status); 2152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result == UCOL_NOT_FOUND) { // Not found in master list 2154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (!isAtStartPrevIterate(data) && 2155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ucol_contractionEndCP(ch, data->coll)) 2156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { 2157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = UCOL_CONTRACTION; 2158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 2159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(coll->UCA) { 2160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 2161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result > UCOL_NOT_FOUND) { 2165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(coll->UCA) { 2166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status); 2167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL ); 2172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result == UCOL_NOT_FOUND) { 2174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = getPrevImplicit(ch, data); 2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_getPrevCE, out-of-line version for use from other files. */ 2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint32_t U_EXPORT2 2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getPrevCE(const UCollator *coll, collIterate *data, 2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ucol_IGetPrevCE(coll, data, status); 2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this should be connected to special Jamo handling */ 2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint32_t U_EXPORT2 2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) { 2193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate colIt; 219450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, &u, 1, &colIt, status); 219550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 219650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 219750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 219850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_IGetNextCE(coll, &colIt, status); 2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument character into the end of the buffer pushing back the 2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* null terminator. 2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate struct data 2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch character to be appended 2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the position of the new addition 2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 220950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline const UChar * insertBufferEnd(collIterate *data, UChar ch) 2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldLength = data->writableBuffer.length(); 221250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength; 2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument string into the end of the buffer pushing back the 2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* null terminator. 2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate struct data 2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param string to be appended 2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param length of the string to be appended 2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the position of the new addition 2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 222450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline const UChar * insertBufferEnd(collIterate *data, const UChar *str, int32_t length) 2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldLength = data->writableBuffer.length(); 222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldLength; 2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Special normalization function for contraction in the forwards iterator. 2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This normalization sequence will place the current character at source->pos 2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and its following normalized sequence into the buffer. 2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The fcd position, pos will be changed. 2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* pos will now point to positions in the buffer. 2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Flags will be changed accordingly. 2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data 2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizeNextContraction(collIterate *data) 2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 224250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t strsize; 2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* because the pointer points to the next character */ 224550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pStart = data->pos - 1; 224650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pEnd; 2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { 224950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.setTo(*(pStart - 1)); 2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strsize = 1; 2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 225350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho strsize = data->writableBuffer.length(); 2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pEnd = data->fcdPosition; 2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 225850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.append( 225950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), status)); 226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(status)) { 226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 226450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.getTerminatedBuffer() + strsize; 2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->origFlags = data->flags; 2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags |= UCOL_ITER_INNORMBUF; 2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Contraction character management function that returns the next character 2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* for the forwards iterator. 2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Does nothing if the next character is in buffer and not the first character 2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* in it. 2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Else it checks next character in data string to see if it is normalizable. 2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* If it is not, the character is simply copied into the buffer, else 2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the whole normalized substring is copied into the buffer, including the 2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* current character. 2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data 2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return next character 2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UChar getNextNormalizedChar(collIterate *data) 2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar nextch; 2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ch; 2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Here we need to add the iterator code. One problem is the way 2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // end of string is handled. If we just return next char, it could 2289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be the sentinel. Most of the cases already check for this, but we 2290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // need to be sure. 2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ) { 2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if no normalization and not in buffer. */ 2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->flags & UCOL_USE_ITERATOR) { 2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar)data->iterator->next(data->iterator); 2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (data->flags & UCOL_ITER_NORM && data->flags & UCOL_USE_ITERATOR) { 2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //normalizeIterator(data); 2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF); 2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((innormbuf && *data->pos != 0) || 2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (data->fcdPosition != NULL && !innormbuf && 2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos < data->fcdPosition)) { 2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if next character is in normalized buffer, no further normalization 2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru is required 2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->flags & UCOL_ITER_HASLEN) { 2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* in data string */ 2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->pos + 1 == data->endp) { 2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (innormbuf) { 2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // inside the normalization buffer, but at the end 2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (since we encountered zero). This means, in the 2325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // case we're using char iterator, that we need to 2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do another round of normalization. 2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if(data->origFlags & UCOL_USE_ITERATOR) { 2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we need to restore original flags, 2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // otherwise, we'll lose them 2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //data->flags = data->origFlags; 2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //normalizeIterator(data); 2332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //return *(data->pos++); 2333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} else { 2334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in writable buffer, at this point fcdPosition can not be 2336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pointing to the end of the data string. see contracting tag. 2337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->fcdPosition) { 2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(data->fcdPosition + 1) == 0 || 2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition + 1 == data->endp) { 2341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* at the end of the string, dump it into the normalizer */ 234250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1; 2343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check if data->pos received a null pointer 2344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->pos == NULL) { 2345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (UChar)-1; // Return to indicate error. 2346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->fcdPosition ++); 2348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = data->fcdPosition; 2350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(data->origFlags & UCOL_USE_ITERATOR) { 2351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we are here, we're using a normalizing iterator. 2352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we should just continue further. 2353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags = data->origFlags; 2354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = NULL; 2355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar)data->iterator->next(data->iterator); 2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(data->pos + 1) == 0) { 2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *data->pos ++; 2367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextch = *data->pos; 2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the current character is not fcd. 2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Trailing combining class == 0. 2372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->fcdPosition == NULL || data->fcdPosition < data->pos) && 2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ || 2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) { 2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Need a more complete FCD check and possible normalization. 2378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalize substring will be appended to buffer 2379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collIterFCD(data)) { 2381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizeNextContraction(data); 2382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (innormbuf) { 2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fcdposition shifted even when there's no normalization, if we 2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru don't input the rest into this, we'll get the wrong position when 2387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru we reach the end of the writableBuffer */ 238850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length = (int32_t)(data->fcdPosition - data->pos + 1); 238950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = insertBufferEnd(data, data->pos - 1, length); 2390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check if data->pos received a null pointer 2391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->pos == NULL) { 2392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (UChar)-1; // Return to indicate error. 2393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (innormbuf) { 2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru no normalization is to be done hence only one character will be 2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appended to the buffer. 2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = insertBufferEnd(data, ch) + 1; 2404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check if data->pos received a null pointer 2405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->pos == NULL) { 2406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (UChar)-1; // Return to indicate error. 2407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* points back to the pos in string */ 2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ch; 2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Function to copy the buffer into writableBuffer and sets the fcd position to 2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the correct position 2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param source data string source 2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param buffer character buffer 2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 242350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &buffer) 2424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* okay confusing part here. to ensure that the skipped characters are 2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru considered later, we need to place it in the appropriate position in the 2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalization buffer and reassign the pos pointer. simple case if pos 2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reside in string, simply copy to normalization buffer and 2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fcdposition = pos, pos = start of normalization buffer. if pos in 2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalization buffer, we'll insert the copy infront of pos and point pos 2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru to the start of the normalization buffer. why am i doing these copies? 2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecialCE does 2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru not require any changes, which be really painful. */ 2434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (source->flags & UCOL_ITER_INNORMBUF) { 243550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t replaceLength = source->pos - source->writableBuffer.getBuffer(); 243650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->writableBuffer.replace(0, replaceLength, buffer); 2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->fcdPosition = source->pos; 2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->origFlags = source->flags; 2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->flags |= UCOL_ITER_INNORMBUF; 2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR); 244350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->writableBuffer = buffer; 2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->pos = source->writableBuffer.getTerminatedBuffer(); 2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Function to get the discontiguos collation element within the source. 2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Note this function will set the position to the appropriate places. 2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll current collator used 2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param source data string source 2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param constart index to the start character in the contraction table 2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return discontiguos collation element offset 2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t getDiscontiguous(const UCollator *coll, collIterate *source, 2459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *constart) 2460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* source->pos currently points to the second combining character after 2462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru the start character */ 246350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *temppos = source->pos; 246450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString buffer; 2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *tempconstart = constart; 2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tempflags = source->flags; 2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool multicontraction = FALSE; 2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterateState discState; 2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backupState(source, &discState); 2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 247227f654740f2a26ad62a5c155af9199af9e69b889claireho buffer.setTo(peekCodePoint(source, -1)); 2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *UCharOffset; 2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar schar, 2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tchar; 2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t result; 2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp) 248027f654740f2a26ad62a5c155af9199af9e69b889claireho || (peekCodeUnit(source, 0) == 0 && 2481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //|| (*source->pos == 0 && 2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((source->flags & UCOL_ITER_INNORMBUF) == 0 || 2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->fcdPosition == NULL || 2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->fcdPosition == source->endp || 2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(source->fcdPosition) == 0 || 2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getCombiningClass(*(source->fcdPosition)) == 0)) || 2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* end of string in null terminated string or stopped by a 2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru null character, note fcd does not always point to a base 2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru character after the discontiguos change */ 249027f654740f2a26ad62a5c155af9199af9e69b889claireho u_getCombiningClass(peekCodePoint(source, 0)) == 0) { 2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //u_getCombiningClass(*(source->pos)) == 0) { 2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //constart = (UChar *)coll->image + getContractOffset(CE); 2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (multicontraction) { 2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->pos = temppos - 1; 249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho setDiscontiguosAttribute(source, buffer); 2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(coll->contractionCEs + 2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (tempconstart - coll->contractionIndex)); 2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru constart = tempconstart; 2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharOffset = (UChar *)(tempconstart + 1); /* skip the backward offset*/ 2504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru schar = getNextNormalizedChar(source); 2505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (schar > (tchar = *UCharOffset)) { 2507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharOffset++; 2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (schar != tchar) { 2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not the correct codepoint. we stuff the current codepoint into 2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru the discontiguos buffer and try the next character */ 251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.append(schar); 2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_getCombiningClass(schar) == 251827f654740f2a26ad62a5c155af9199af9e69b889claireho u_getCombiningClass(peekCodePoint(source, -2))) { 251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.append(schar); 2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = *(coll->contractionCEs + 2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result == UCOL_NOT_FOUND) { 2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (isContraction(result)) { 2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* this is a multi-contraction*/ 2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempconstart = (UChar *)coll->image + getContractOffset(result); 2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(coll->contractionCEs + (constart - coll->contractionIndex)) 2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru != UCOL_NOT_FOUND) { 2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru multicontraction = TRUE; 2534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temppos = source->pos + 1; 2535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho setDiscontiguosAttribute(source, buffer); 2538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 2539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no problems simply reverting just like that, 2543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if we are in string before getting into this function, points back to 2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string hence no problem. 2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if we are in normalization buffer before getting into this function, 2546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru since we'll never use another normalization within this function, we 2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru know that fcdposition points to a base character. the normalization buffer 2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru never change, hence this revert works. */ 2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loadState(source, &discState, TRUE); 2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goBackOne(source); 2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //source->pos = temppos - 1; 2553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->flags = tempflags; 2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(coll->contractionCEs + (constart - coll->contractionIndex)); 2555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* now uses Mark's getImplicitPrimary code */ 2558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) { 2560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t r = uprv_uca_getImplicitPrimary(cp); 2561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0; 2562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->offsetRepeatCount += 1; 2563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order' 2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument character into the front of the buffer replacing the 2568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* front null terminator. 2569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data 2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch character to be appended 2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 257350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline void insertBufferFront(collIterate *data, UChar ch) 2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 257550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTerminatedBuffer() + 2; 2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Special normalization function for contraction in the previous iterator. 2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This normalization sequence will place the current character at source->pos 2581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and its following normalized sequence into the buffer. 2582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The fcd position, pos will be changed. 2583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* pos will now point to positions in the buffer. 2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Flags will be changed accordingly. 2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data 2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizePrevContraction(collIterate *data, UErrorCode *status) 2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 259050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pEnd = data->pos + 1; /* End normalize + 1 */ 259150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pStart; 2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString endOfBuffer; 2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->flags & UCOL_ITER_HASLEN) { 2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalization buffer not used yet, we'll pull down the next 2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru character into the end of the buffer 2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 259950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho endOfBuffer.setTo(*pEnd); 2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 260250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho endOfBuffer.setTo(data->writableBuffer, 1); // after the leading NUL 2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->fcdPosition == NULL) { 2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStart = data->string; 2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStart = data->fcdPosition + 1; 2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t normLen = 261250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), 261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer, 261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status). 261550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length(); 261650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 261750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this puts the null termination infront of the normalized string instead 2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru of the end 2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 262350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = 262450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminatedBuffer() + 262550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1 + normLen; 2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->origFlags = data->flags; 2627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags |= UCOL_ITER_INNORMBUF; 2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 2629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Contraction character management function that returns the previous character 2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* for the backwards iterator. 2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Does nothing if the previous character is in buffer and not the first 2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* character in it. 2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Else it checks previous character in data string to see if it is 2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* normalizable. 2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* If it is not, the character is simply copied into the buffer, else 2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the whole normalized substring is copied into the buffer, including the 2640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* current character. 2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data 2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return previous character 2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status) 2646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar prevch; 2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ch; 264950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *start; 2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF); 2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 || 2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (innormbuf && *(data->pos - 1) != 0)) { 2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if no normalization. 2655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if previous character is in normalized buffer, no further normalization 2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru is required 2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->flags & UCOL_USE_ITERATOR) { 2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->move(data->iterator, -1, UITER_CURRENT); 2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar)data->iterator->next(data->iterator); 2661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos - 1); 2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start = data->pos; 2667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((data->fcdPosition==NULL)||(data->flags & UCOL_ITER_HASLEN)) { 2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* in data string */ 2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((start - 1) == data->string) { 2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(start - 1); 2671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start --; 2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *start; 2674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevch = *(start - 1); 2675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in writable buffer, at this point fcdPosition can not be NULL. 2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru see contracting tag. 2680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->fcdPosition == data->string) { 2682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* at the start of the string, just dump it into the normalizer */ 268350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho insertBufferFront(data, *(data->fcdPosition)); 2684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition = NULL; 2685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos - 1); 2686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start = data->fcdPosition; 2688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *start; 2689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevch = *(start - 1); 2690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the current character is not fcd. 2693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Trailing combining class == 0. 2694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->fcdPosition > start && 2696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (ch >= NFC_ZERO_CC_BLOCK_LIMIT_ || prevch >= NFC_ZERO_CC_BLOCK_LIMIT_)) 2697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 2698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Need a more complete FCD check and possible normalization. 2700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalize substring will be appended to buffer 2701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 270250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *backuppos = data->pos; 2703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = start; 2704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collPrevIterFCD(data)) { 2705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizePrevContraction(data, status); 2706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos - 1); 2707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = backuppos; 2709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition ++; 2710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (innormbuf) { 2713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru no normalization is to be done hence only one character will be 2715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appended to the buffer. 2716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 271750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho insertBufferFront(data, ch); 2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition --; 2719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ch; 2722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function handles the special CEs like contractions, expansions, surrogates, Thai */ 2725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* It is called by getNextCE */ 2726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2727b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* The following should be even */ 2728b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define UCOL_MAX_DIGITS_FOR_NUMBER 254 2729b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, collIterate *source, UErrorCode *status) { 2731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState entryState; 2732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &entryState); 2733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 cp = ch; 2734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 2736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This loop will repeat only in the case of contractions, and only when a contraction 2737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // is found and the first CE resulting from that contraction is itself a special 2738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (an expansion, for example.) All other special CE types are fully handled the 2739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // first time through, and the loop exits. 2740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const uint32_t *CEOffset = NULL; 2742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch(getCETag(CE)) { 2743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case NOT_FOUND_TAG: 2744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This one is not found, and we'll let somebody else bother about it... no more games */ 2745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 2746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SPEC_PROC_TAG: 2747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Special processing is getting a CE that is preceded by a certain prefix 2749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Currently this is only needed for optimizing Japanese length and iteration marks. 2750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // When we encouter a special processing tag, we go backwards and try to see if 2751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we have a match. 2752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Contraction tables are used - so the whole process is not unlike contraction. 2753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // prefix data is stored backwards in the table. 2754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset; 2755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar, tchar; 2756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState prefixState; 2757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &prefixState); 2758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &entryState, TRUE); 2759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); // We want to look at the point where we entered - actually one 2760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // before that... 2761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 2763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This loop will run once per source string character, for as long as we 2764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are matching a potential contraction sequence 2765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // First we position ourselves at the begining of contraction sequence 2767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE); 2768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collIter_bos(source)) { 2769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); 2770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = getPrevNormalizedChar(source, status); 2773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ 2776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset++; 2777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (schar == tchar) { 2780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Found the source string char in the table. 2781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pick up the corresponding CE from the table. 2782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 2784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 2786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Source string char was not in the table. 2788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have not found the prefix. 2789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (ContractionStart - coll->contractionIndex)); 2791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isPrefix(CE)) { 2794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The source string char was in the contraction table, and the corresponding 2795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE is not a prefix CE. We found the prefix, break 2796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // out of loop, this CE will end up being returned. This is the normal 2797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // way out of prefix handling when the source actually contained 2798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the prefix. 2799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != UCOL_NOT_FOUND) { // we found something and we can merilly continue 2803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &prefixState, TRUE); 2804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->origFlags & UCOL_USE_ITERATOR) { 2805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags = source->origFlags; 2806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // prefix search was a failure, we have to backup all the way to the start 2808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &entryState, TRUE); 2809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CONTRACTION_TAG: 2813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This should handle contractions */ 2815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState state; 2816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &state); 2817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t firstCE = *(coll->contractionCEs + ((UChar *)coll->image+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND; 2818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset; 2819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar, tchar; 2820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 2822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This loop will run once per source string character, for as long as we */ 2823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* are matching a potential contraction sequence */ 2824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* First we position ourselves at the begining of contraction sequence */ 2826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE); 2827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collIter_eos(source)) { 2829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Ran off the end of the source string. 2830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); 2831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // So we'll pick whatever we have at the point... 2832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (CE == UCOL_NOT_FOUND) { 2833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // back up the source over all the chars we scanned going into this contraction. 2834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = firstCE; 2835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, TRUE); 2836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->origFlags & UCOL_USE_ITERATOR) { 2837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags = source->origFlags; 2838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t maxCC = (uint8_t)(*(UCharOffset)&0xFF); /*get the discontiguos stuff */ /* skip the backward offset, see above */ 2844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8); 2845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = getNextNormalizedChar(source); 2847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ 2848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset++; 2849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (schar == tchar) { 2852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Found the source string char in the contraction table. 2853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pick up the corresponding CE from the table. 2854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 2856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 2858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Source string char was not in contraction table. 2860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Unless we have a discontiguous contraction, we have finished 2861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // with this contraction. 2862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // in order to do the proper detection, we 2863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // need to see if we're dealing with a supplementary 2864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We test whether the next two char are surrogate pairs. 2865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * This test is done if the iterator is not NULL. 2866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * If there is no surrogate pair, the iterator 2867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * goes back one if needed. */ 2868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 miss = schar; 2869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->iterator) { 2870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 surrNextChar; /* the next char in the iteration to test */ 2871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t prevPos; /* holds the previous position before move forward of the source iterator */ 2872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U16_IS_LEAD(schar) && source->iterator->hasNext(source->iterator)) { 2873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prevPos = source->iterator->index; 2874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru surrNextChar = getNextNormalizedChar(source); 2875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_TRAIL(surrNextChar)) { 2876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru miss = U16_GET_SUPPLEMENTARY(schar, surrNextChar); 2877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (prevPos < source->iterator->index){ 2878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (U16_IS_LEAD(schar)) { 2882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source)); 2883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t sCC; 2886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (miss < 0x300 || 2887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxCC == 0 || 2888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (sCC = i_getCombiningClass(miss, coll)) == 0 || 2889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCC>maxCC || 2890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (allSame != 0 && sCC == maxCC) || 2891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIter_eos(source)) 2892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Contraction can not be discontiguous. 2894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); // back up the source string by one, 2895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // because the character we just looked at was 2896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // not part of the contraction. */ 2897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_IS_SUPPLEMENTARY(miss)) { 2898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (ContractionStart - coll->contractionIndex)); 2902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 2903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Contraction is possibly discontiguous. 2905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Scan more of source string looking for a match 2906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar tempchar; 2908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* find the next character if schar is not a base character 2909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru and we are not yet at the end of the string */ 2910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempchar = getNextNormalizedChar(source); 2911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // probably need another supplementary thingie here 2912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (i_getCombiningClass(tempchar, coll) == 0) { 2914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_IS_SUPPLEMENTARY(miss)) { 2916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Spit out the last char of the string, wasn't tasty enough */ 2919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (ContractionStart - coll->contractionIndex)); 2921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 2922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = getDiscontiguous(coll, source, ContractionStart); 2923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } // else after if(schar == tchar) 2926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE == UCOL_NOT_FOUND) { 2928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* The Source string did not match the contraction that we were checking. */ 2929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Back up the source position to undo the effects of having partially */ 2930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* scanned through what ultimately proved to not be a contraction. */ 2931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, TRUE); 2932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = firstCE; 2933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContraction(CE)) { 2937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The source string char was in the contraction table, and the corresponding 2938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE is not a contraction CE. We completed the contraction, break 2939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // out of loop, this CE will end up being returned. This is the normal 2940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // way out of contraction handling when the source actually contained 2941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the contraction. 2942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The source string char was in the contraction table, and the corresponding 2947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE is IS a contraction CE. We will continue looping to check the source 2948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // string for the remaining chars in the contraction. 2949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex)); 2950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tempCE != UCOL_NOT_FOUND) { 2951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have scanned a a section of source string for which there is a 2952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE from the contraction table. Remember the CE and scan position, so 2953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that we can return to this point if further scanning fails to 2954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // match a longer contraction sequence. 2955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru firstCE = tempCE; 2956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &state); 2959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru getNextNormalizedChar(source); 2960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Another way to do this is: 2962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //collIterateState tempState; 2963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //backupState(source, &tempState); 2964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //goBackOne(source); 2965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //backupState(source, &state); 2966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //loadState(source, &tempState, TRUE); 2967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The problem is that for incomplete contractions we have to remember the previous 2969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // position. Before, the only thing I needed to do was state.pos--; 2970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // After iterator introduction and especially after introduction of normalizing 2971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // iterators, it became much more difficult to decrease the saved state. 2972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // I'm not yet sure which of the two methods above is faster. 2973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } // for(;;) 2975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } // case CONTRACTION_TAG: 2977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case LONG_PRIMARY_TAG: 2978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER; 2980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON; 2981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetRepeatCount += 1; 2982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 2983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case EXPANSION_TAG: 2985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 2986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This should handle expansion. */ 2987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* NOTE: we can encounter both continuations and expansions in an expansion! */ 2988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* I have to decide where continuations are going to be dealt with */ 2989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t size; 2990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i; /* general counter */ 2991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */ 2993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru size = getExpansionCount(CE); 2994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *CEOffset++; 299527f654740f2a26ad62a5c155af9199af9e69b889claireho //source->offsetRepeatCount = -1; 2996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */ 2998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 1; i<size; i++) { 2999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = *CEOffset++; 300027f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetRepeatCount += 1; 3001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* else, we do */ 3003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(*CEOffset != 0) { 3004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = *CEOffset++; 300527f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetRepeatCount += 1; 3006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 3010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case DIGIT_TAG: 3012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 3014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We do a check to see if we want to collate digits as numbers; if so we generate 3015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru a custom collation key. Otherwise we pull out the value stored in the expansion table. 3016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //uint32_t size; 3018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i; /* general counter */ 3019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->coll->numericCollation == UCOL_ON){ 3021b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collIterateState digitState = {0,0,0,0,0,0,0,0,0}; 3022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 char32 = 0; 3023b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t digVal = 0; 3024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t digIndx = 0; 3026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t endIndex = 0; 3027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t trailingZeroIndex = 0; 3028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t collateVal = 0; 3030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool nonZeroValReached = FALSE; 3032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3033b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3]; // I just need a temporary place to store my generated CEs. 3034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We parse the source string until we hit a char that's NOT a digit. 3036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Use this u_charDigitValue. This might be slow because we have to 3037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru handle surrogates... 3038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_LEAD(ch)){ 3041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!collIter_eos(source)) { 3042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &digitState); 3043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar trail = getNextNormalizedChar(source); 3044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 3045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = U16_GET_SUPPLEMENTARY(ch, trail); 3046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &digitState, TRUE); 3048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char32 = ch; 3055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digVal = u_charDigitValue(char32); 3057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digVal = u_charDigitValue(cp); // if we have arrived here, we have 3059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // already processed possible supplementaries that trigered the digit tag - 3060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // all supplementaries are marked in the UCA. 3061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We pad a zero in front of the first element anyways. This takes 3063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru care of the (probably) most common case where people are sorting things followed 3064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru by a single digit 3065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx++; 3067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;){ 3068b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Make sure we have enough space. No longer needed; 3069b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // at this point digIndx now has a max value of UCOL_MAX_DIGITS_FOR_NUMBER 3070b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // (it has been pre-incremented) so we just ensure that numTempBuf is big enough 3071b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3). 3072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Skipping over leading zeroes. 3074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (digVal != 0) { 3075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru nonZeroValReached = TRUE; 3076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (nonZeroValReached) { 3078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We parse the digit string into base 100 numbers (this fits into a byte). 3080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We only add to the buffer in twos, thus if we are parsing an odd character, 3081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru that serves as the 'tens' digit while the if we are parsing an even one, that 3082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into 3083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid 3084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less 3085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru than all the other bytes. 3086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (digIndx % 2 == 1){ 3089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collateVal += (uint8_t)digVal; 3090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We don't enter the low-order-digit case unless we've already seen 3092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the high order, or for the first digit, which is always non-zero. 3093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collateVal != 0) 3094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroIndex = 0; 3095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; 3097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collateVal = 0; 3098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else{ 3100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We drop the collation value into the buffer so if we need to do 3101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a "front patch" we don't have to check to see if we're hitting the 3102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // last element. 3103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collateVal = (uint8_t)(digVal * 10); 3104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for trailing zeroes. 3106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collateVal == 0) 3107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!trailingZeroIndex) 3109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroIndex = (digIndx/2) + 2; 3110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 3112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroIndex = 0; 3113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; 3115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx++; 3117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Get next character. 3120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!collIter_eos(source)){ 3121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ch = getNextNormalizedChar(source); 3122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_LEAD(ch)){ 3123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!collIter_eos(source)) { 3124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &digitState); 3125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar trail = getNextNormalizedChar(source); 3126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 3127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = U16_GET_SUPPLEMENTARY(ch, trail); 3128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &digitState, TRUE); 3130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((digVal = u_charDigitValue(char32)) == -1 || digIndx > UCOL_MAX_DIGITS_FOR_NUMBER){ 3138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Resetting position to point to the next unprocessed char. We 3139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // overshot it when doing our test/set for numbers. 3140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (char32 > 0xFFFF) { // For surrogates. 3141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &digitState, TRUE); 3142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //goBackOne(source); 3143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 3145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (nonZeroValReached == FALSE){ 3153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx = 2; 3154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[2] = 6; 3155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ; 3158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (digIndx % 2 != 0){ 3159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what 3161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward. 3162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a 3163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru single pass and optimizes for strings with single digits. I'm just assuming that's the more common case. 3164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 2; i < endIndex; i++){ 3167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[i] = (((((numTempBuf[i] - 6)/2) % 10) * 10) + 3168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6; 3169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru --digIndx; 3171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Subtract one off of the last byte. 3174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[endIndex-1] -= 1; 3175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We want to skip over the first two slots in the buffer. The first slot 3178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the 3179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sign/exponent byte: 0x80 + (decimalPos/2) & 7f. 3180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[0] = UCOL_CODAN_PLACEHOLDER; 3182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F)); 3183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Now transfer the collation key to our collIterate struct. 3185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The total size for our collation key is endIndx bumped up to the next largest even value divided by two. 3186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //size = ((endIndex+1) & ~1)/2; 3187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight 3188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight 3189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_BYTE_COMMON; // Tertiary weight. 3190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = 2; // Reset the index into the buffer. 3191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i < endIndex) 3192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t primWeight = numTempBuf[i++] << 8; 3194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( i < endIndex) 3195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primWeight |= numTempBuf[i++]; 3196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER; 3197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // no numeric mode, we'll just switch to whatever we stashed and continue 3201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */ 3202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *CEOffset++; 3203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 3206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* various implicits optimization */ 3208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case IMPLICIT_TAG: /* everything that is not defined otherwise */ 3209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* UCA is filled with these. Tailorings are NOT_FOUND */ 3210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getImplicit(cp, source); 3211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/ 3212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImplicit 3213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getImplicit(cp, source); 3214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/ 3215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t 3217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7; 3218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t LCount = 19; 3219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t VCount = 21; 3220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t TCount = 28; 3221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t NCount = VCount * TCount; // 588 3222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t SCount = LCount * NCount; // 11172 3223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t L = ch - SBase; 3224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // divide into pieces 3226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t T = L % TCount; // we do it in this order since some compilers can do % and / in one operation 3228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L /= TCount; 3229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t V = L % VCount; 3230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L /= VCount; 3231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // offset them 3233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L += LBase; 3235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru V += VBase; 3236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru T += TBase; 3237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // return the first CE, but first put the rest into the expansion buffer 3239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!source->coll->image->jamoSpecial) { // FAST PATH 3240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V); 3242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (T != TBase) { 3243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T); 3244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UTRIE_GET32_FROM_LEAD(&coll->mapping, L); 3247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // Jamo is Special 3249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Since Hanguls pass the FCD check, it is 3250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // guaranteed that we won't be in 3251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the normalization buffer if something like this happens 3252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 3253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // However, if we are using a uchar iterator and normalization 3254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // is ON, the Hangul that lead us here is going to be in that 3255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // normalization buffer. Here we want to restore the uchar 3256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // iterator state and pull out of the normalization buffer 3257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) { 3258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags = source->origFlags; // restore the iterator 3259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->pos = NULL; 3260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 3262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Move Jamos into normalization buffer 326350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *buffer = source->writableBuffer.getBuffer(4); 326450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t bufferLength; 326550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer[0] = (UChar)L; 326650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer[1] = (UChar)V; 3267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (T != TBase) { 326850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer[2] = (UChar)T; 326950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bufferLength = 3; 3270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 327150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bufferLength = 2; 3272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 327350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->writableBuffer.releaseBuffer(bufferLength); 3274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Indicate where to continue in main input string after exhausting the writableBuffer 3276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho source->fcdPosition = source->pos; 3277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->pos = source->writableBuffer.getTerminatedBuffer(); 3279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->origFlags = source->flags; 3280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags |= UCOL_ITER_INNORMBUF; 3281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(UCOL_IGNORABLE); 3284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SURROGATE_TAG: 3287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we encountered a leading surrogate. We shall get the CE by using the following code unit */ 3288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* two things can happen here: next code point can be a trailing surrogate - we will use it */ 3289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* to retrieve the CE, or it is not a trailing surrogate (or the string is done). In that case */ 329027f654740f2a26ad62a5c155af9199af9e69b889claireho /* we treat it like an unassigned code point. */ 3291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar trail; 3293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState state; 3294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &state); 3295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) { 3296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we chould have stepped one char forward and it might have turned that it 3297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // was not a trail surrogate. In that case, we have to backup. 3298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, TRUE); 329927f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 3300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* TODO: CE contain the data from the previous CE + the mask. It should at least be unmasked */ 3302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFFFF, trail); 3303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates in this block, but not this one. 3304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We need to backup 3305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, TRUE); 3306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 3307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // calculate the supplementary code point value, if surrogate was not tailored 3309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000)); 3310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case LEAD_SURROGATE_TAG: /* D800-DBFF*/ 3314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar nextChar; 3315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if( source->flags & UCOL_USE_ITERATOR) { 3316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source->iterator))) { 3317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cp = U16_GET_SUPPLEMENTARY(ch, nextChar); 3318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->iterator->next(source->iterator); 3319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getImplicit(cp, source); 3320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) || (source->pos<source->endp)) && 332227f654740f2a26ad62a5c155af9199af9e69b889claireho U_IS_TRAIL((nextChar=*source->pos))) { 332327f654740f2a26ad62a5c155af9199af9e69b889claireho cp = U16_GET_SUPPLEMENTARY(ch, nextChar); 332427f654740f2a26ad62a5c155af9199af9e69b889claireho source->pos++; 332527f654740f2a26ad62a5c155af9199af9e69b889claireho return getImplicit(cp, source); 3326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 332727f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 3328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/ 332927f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; /* broken surrogate sequence */ 3330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CHARSET_TAG: 3331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* not yet implemented */ 3332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* probably after 1.8 */ 3333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_NOT_FOUND; 3334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: 3335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INTERNAL_PROGRAM_ERROR; 3336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE=0; 3337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (CE <= UCOL_NOT_FOUND) break; 3340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CE; 3342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* now uses Mark's getImplicitPrimary code */ 3346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 3347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) { 3348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t r = uprv_uca_getImplicitPrimary(cp); 3349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505; 3351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->toReturn = collationSource->CEpos; 3352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 335327f654740f2a26ad62a5c155af9199af9e69b889claireho // **** doesn't work if using iterator **** 335427f654740f2a26ad62a5c155af9199af9e69b889claireho if (collationSource->flags & UCOL_ITER_INNORMBUF) { 335527f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->offsetRepeatCount = 1; 335627f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 335727f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->string); 3358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 335927f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode = U_ZERO_ERROR; 336027f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->appendOffset(firstOffset, errorCode); 336127f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->appendOffset(firstOffset + 1, errorCode); 3362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 336327f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->offsetReturn = collationSource->offsetStore - 1; 336427f654740f2a26ad62a5c155af9199af9e69b889claireho *(collationSource->offsetBuffer) = firstOffset; 336527f654740f2a26ad62a5c155af9199af9e69b889claireho if (collationSource->offsetReturn == collationSource->offsetBuffer) { 336627f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->offsetStore = collationSource->offsetBuffer; 336727f654740f2a26ad62a5c155af9199af9e69b889claireho } 336827f654740f2a26ad62a5c155af9199af9e69b889claireho } 3369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return ((r & 0x0000FFFF)<<16) | 0x000000C0; 3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 3374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function handles the special CEs like contractions, expansions, 3375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * surrogates, Thai. 3376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is called by both getPrevCE 3377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 3378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, 3379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate *source, 3380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 3381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const uint32_t *CEOffset = NULL; 3383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *UCharOffset = NULL; 3384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar; 3385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *constart = NULL; 3386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t size; 3387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar buffer[UCOL_MAX_BUFFER]; 3388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *endCEBuffer; 3389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *strbuffer; 3390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t noChars = 0; 3391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t CECount = 0; 3392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) 3394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 3395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* the only ces that loops are thai and contractions */ 3396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch (getCETag(CE)) 3397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case NOT_FOUND_TAG: /* this tag always returns */ 3399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 3400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SPEC_PROC_TAG: 3402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Special processing is getting a CE that is preceded by a certain prefix 3404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Currently this is only needed for optimizing Japanese length and iteration marks. 3405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // When we encouter a special processing tag, we go backwards and try to see if 3406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we have a match. 3407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Contraction tables are used - so the whole process is not unlike contraction. 3408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // prefix data is stored backwards in the table. 3409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset; 3410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar, tchar; 3411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState prefixState; 3412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &prefixState); 3413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 3414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This loop will run once per source string character, for as long as we 3415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are matching a potential contraction sequence 3416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // First we position ourselves at the begining of contraction sequence 3418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE); 3419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collIter_bos(source)) { 3421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); 3422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = getPrevNormalizedChar(source, status); 3425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 3426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ 3428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset++; 3429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (schar == tchar) { 3432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Found the source string char in the table. 3433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pick up the corresponding CE from the table. 3434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 3435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 3436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 3438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if there is a completely ignorable code point in the middle of 3440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a prefix, we need to act as if it's not there 3441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero) 3442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // lone surrogates cannot be set to zero as it would break other processing 3443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar); 3444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // it's easy for BMP code points 3445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isZeroCE == 0) { 3446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 344727f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(U16_IS_SURROGATE(schar)) { 3448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // for supplementary code points, we have to check the next one 3449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // situations where we are going to ignore 3450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1. beginning of the string: schar is a lone surrogate 3451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2. schar is a lone surrogate 3452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 3. schar is a trail surrogate in a valid surrogate sequence 3453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that is explicitly set to zero. 3454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!collIter_bos(source)) { 3455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar lead; 345627f654740f2a26ad62a5c155af9199af9e69b889claireho if(!U16_IS_SURROGATE_LEAD(schar) && U16_IS_LEAD(lead = getPrevNormalizedChar(source, status))) { 3457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, lead); 345827f654740f2a26ad62a5c155af9199af9e69b889claireho if(isSpecial(isZeroCE) && getCETag(isZeroCE) == SURROGATE_TAG) { 3459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar); 3460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(finalCE == 0) { 3461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // this is a real, assigned completely ignorable code point 3462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 3463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 3464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 346727f654740f2a26ad62a5c155af9199af9e69b889claireho // lone surrogate, treat like unassigned 346827f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 3469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3470c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 347127f654740f2a26ad62a5c155af9199af9e69b889claireho // lone surrogate at the beggining, treat like unassigned 347227f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 3473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Source string char was not in the table. 3476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have not found the prefix. 3477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 3478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (ContractionStart - coll->contractionIndex)); 3479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isPrefix(CE)) { 3482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The source string char was in the contraction table, and the corresponding 3483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE is not a prefix CE. We found the prefix, break 3484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // out of loop, this CE will end up being returned. This is the normal 3485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // way out of prefix handling when the source actually contained 3486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the prefix. 3487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &prefixState, TRUE); 3491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 349450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case CONTRACTION_TAG: { 3495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* to ensure that the backwards and forwards iteration matches, we 3496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru take the current region of most possible match and pass it through 3497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru the forward iteration. this will ensure that the obstinate problem of 3498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru overlapping contractions will not occur. 3499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 350027f654740f2a26ad62a5c155af9199af9e69b889claireho schar = peekCodeUnit(source, 0); 3501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru constart = (UChar *)coll->image + getContractOffset(CE); 3502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isAtStartPrevIterate(source) 3503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* commented away contraction end checks after adding the checks 3504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru in getPrevCE */) { 3505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* start of string or this is not the end of any contraction */ 3506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 3507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (constart - coll->contractionIndex)); 3508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strbuffer = buffer; 3511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1); 3512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(UCharOffset --) = 0; 3513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru noChars = 0; 3514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // have to swap thai characters 3515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (ucol_unsafeCP(schar, coll)) { 3516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(UCharOffset) = schar; 3517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru noChars++; 3518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset --; 3519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = getPrevNormalizedChar(source, status); 3520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 3521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: when we exhaust the contraction buffer, 3522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // it needs to get reallocated. The problem is 3523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that the size depends on the string which is 3524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // not iterated over. However, since we're travelling 3525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // backwards, we already had to set the iterator at 3526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the end - so we might as well know where we are? 3527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (UCharOffset + 1 == buffer) { 3528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we have exhausted the buffer */ 3529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t newsize = 0; 3530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->pos) { // actually dealing with a position 353150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newsize = (int32_t)(source->pos - source->string + 1); 3532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // iterator 3533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newsize = 4 * UCOL_MAX_BUFFER; 3534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strbuffer = (UChar *)uprv_malloc(sizeof(UChar) * 3536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (newsize + UCOL_MAX_BUFFER)); 3537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 3538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strbuffer == NULL) { 3539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 3540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_NO_MORE_CES; 3541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset = strbuffer + newsize; 3543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(UCharOffset, buffer, 3544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_MAX_BUFFER * sizeof(UChar)); 3545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset --; 3546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((source->pos && (source->pos == source->string || 3548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ((source->flags & UCOL_ITER_INNORMBUF) && 3549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->pos - 1) == 0 && source->fcdPosition == NULL))) 3550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (source->iterator && !source->iterator->hasPrevious(source->iterator))) { 3551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* adds the initial base character to the string */ 3555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(UCharOffset) = schar; 3556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru noChars++; 3557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t offsetBias; 3559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // **** doesn't work if using iterator **** 3561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->flags & UCOL_ITER_INNORMBUF) { 3562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru offsetBias = -1; 3563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru offsetBias = (int32_t)(source->pos - source->string); 3565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* a new collIterate is used to simplify things, since using the current 3568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate will mean that the forward and backwards iteration will 3569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru share and change the same buffers. we don't want to get into that. */ 3570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate temp; 3571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t rawOffset; 3572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, UCharOffset, noChars, &temp, status); 357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_NULLORDER; 357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru temp.flags &= ~UCOL_ITER_NORM; 3578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru temp.flags |= source->flags & UCOL_FORCE_HAN_IMPLICIT; 3579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 358050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho rawOffset = (int32_t)(temp.pos - temp.string); // should always be zero? 3581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &temp, status); 3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->extendCEs) { 3584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endCEBuffer = source->extendCEs + source->extendCEsSize; 358550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CECount = (int32_t)((source->CEpos - source->extendCEs)/sizeof(uint32_t)); 3586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE; 358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CECount = (int32_t)((source->CEpos - source->CEs)/sizeof(uint32_t)); 3589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (CE != UCOL_NO_MORE_CES) { 3592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos ++) = CE; 3593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (offsetBias >= 0) { 359527f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(rawOffset + offsetBias, *status); 3596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CECount++; 3599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->CEpos == endCEBuffer) { 3600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* ran out of CE space, reallocate to new buffer. 3601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru If reallocation fails, reset pointers and bail out, 3602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru there's no guarantee of the right character position after 3603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru this bail*/ 360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!increaseCEsCapacity(source)) { 3605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 360627f654740f2a26ad62a5c155af9199af9e69b889claireho break; 3607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endCEBuffer = source->extendCEs + source->extendCEsSize; 3610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) { 361350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho rawOffset = (int32_t)(temp.fcdPosition - temp.string); 3614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 361550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho rawOffset = (int32_t)(temp.pos - temp.string); 3616b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3617b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &temp, status); 3619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 362127f654740f2a26ad62a5c155af9199af9e69b889claireho if (strbuffer != buffer) { 362227f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(strbuffer); 362327f654740f2a26ad62a5c155af9199af9e69b889claireho } 362427f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(*status)) { 362527f654740f2a26ad62a5c155af9199af9e69b889claireho return (uint32_t)UCOL_NULLORDER; 362627f654740f2a26ad62a5c155af9199af9e69b889claireho } 362727f654740f2a26ad62a5c155af9199af9e69b889claireho 362827f654740f2a26ad62a5c155af9199af9e69b889claireho if (source->offsetRepeatValue != 0) { 3629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (CECount > noChars) { 363027f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetRepeatCount += temp.offsetRepeatCount; 3631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // **** does this really skip the right offsets? **** 3633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetReturn -= (noChars - CECount); 3634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (offsetBias >= 0) { 3638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetReturn = source->offsetStore - 1; 3639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->offsetReturn == source->offsetBuffer) { 3640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetStore = source->offsetBuffer; 3641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos - 1; 3645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->toReturn == source->CEs) { 3646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->CEpos = source->CEs; 3647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->toReturn); 365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case LONG_PRIMARY_TAG: 3652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON; 3654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER; 3655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos - 1; 3656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 365727f654740f2a26ad62a5c155af9199af9e69b889claireho if (source->flags & UCOL_ITER_INNORMBUF) { 3658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetRepeatCount = 1; 365927f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 366027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t firstOffset = (int32_t)(source->pos - source->string); 3661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 366227f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset, *status); 366327f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 3664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 366527f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetReturn = source->offsetStore - 1; 366627f654740f2a26ad62a5c155af9199af9e69b889claireho *(source->offsetBuffer) = firstOffset; 366727f654740f2a26ad62a5c155af9199af9e69b889claireho if (source->offsetReturn == source->offsetBuffer) { 366827f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetStore = source->offsetBuffer; 366927f654740f2a26ad62a5c155af9199af9e69b889claireho } 367027f654740f2a26ad62a5c155af9199af9e69b889claireho } 3671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->toReturn); 3674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case EXPANSION_TAG: /* this tag always returns */ 3677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru This should handle expansion. 3680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NOTE: we can encounter both continuations and expansions in an expansion! 3681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru I have to decide where continuations are going to be dealt with 3682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t firstOffset = (int32_t)(source->pos - source->string); 3684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // **** doesn't work if using iterator **** 3686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->offsetReturn != NULL) { 3687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetReturn == source->offsetBuffer) { 3688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetStore = source->offsetBuffer; 3689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }else { 3690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru firstOffset = -1; 3691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* find the offset to expansion table */ 3695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE); 3696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru size = getExpansionCount(CE); 3697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (size != 0) { 3698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if there are less than 16 elements in expansion, we don't terminate 3700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t count; 3702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (count = 0; count < size; count++) { 3704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos ++) = *CEOffset++; 3705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (firstOffset >= 0) { 370727f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 3708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* else, we do */ 3712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (*CEOffset != 0) { 3713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos ++) = *CEOffset ++; 3714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (firstOffset >= 0) { 371627f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 3717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (firstOffset >= 0) { 3722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetReturn = source->offsetStore - 1; 3723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->offsetBuffer) = firstOffset; 3724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->offsetReturn == source->offsetBuffer) { 3725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetStore = source->offsetBuffer; 3726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetRepeatCount += size - 1; 3729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos - 1; 3732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // in case of one element expansion, we 3733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // want to immediately return CEpos 3734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->toReturn == source->CEs) { 3735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->CEpos = source->CEs; 3736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->toReturn); 3739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case DIGIT_TAG: 3742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We do a check to see if we want to collate digits as numbers; if so we generate 3745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru a custom collation key. Otherwise we pull out the value stored in the expansion table. 3746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i; /* general counter */ 3748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->coll->numericCollation == UCOL_ON){ 3750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t digIndx = 0; 3751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t endIndex = 0; 3752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t leadingZeroIndex = 0; 3753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t trailingZeroCount = 0; 3754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t collateVal = 0; 3756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool nonZeroValReached = FALSE; 3758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2]; // I just need a temporary place to store my generated CEs. 3760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We parse the source string until we hit a char that's NOT a digit. 3762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Use this u_charDigitValue. This might be slow because we have to 3763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru handle surrogates... 3764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3765b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* 3766b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru We need to break up the digit string into collection elements of UCOL_MAX_DIGITS_FOR_NUMBER or less, 3767b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru with any chunks smaller than that being on the right end of the digit string - i.e. the first collation 3768b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru element we process when going backward. To determine how long that chunk might be, we may need to make 3769b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru two passes through the loop that collects digits - one to see how long the string is (and how much is 3770b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru leading zeros) to determine the length of that right-hand chunk, and a second (if the whole string has 3771b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru more than UCOL_MAX_DIGITS_FOR_NUMBER non-leading-zero digits) to actually process that collation 3772b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru element chunk after resetting the state to the initialState at the right side of the digit string. 3773b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 3774b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t ceLimit = 0; 3775b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar initial_ch = ch; 3776b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collIterateState initialState = {0,0,0,0,0,0,0,0,0}; 3777b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru backupState(source, &initialState); 3778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3779b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(;;) { 3780b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collIterateState state = {0,0,0,0,0,0,0,0,0}; 3781b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 char32 = 0; 3782b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t digVal = 0; 3783b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3784b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U16_IS_TRAIL (ch)) { 3785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!collIter_bos(source)){ 3786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar lead = getPrevNormalizedChar(source, status); 3787b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U16_IS_LEAD(lead)) { 3788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = U16_GET_SUPPLEMENTARY(lead,ch); 3789b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goBackOne(source); 3790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 3791b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = ch; 3792b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3799b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru digVal = u_charDigitValue(char32); 3800b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3801b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(;;) { 3802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Make sure we have enough space. No longer needed; 3803b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // at this point the largest value of digIndx when we need to save data in numTempBuf 3804b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // is UCOL_MAX_DIGITS_FOR_NUMBER-1 (digIndx is post-incremented) so we just ensure 3805b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // that numTempBuf is big enough (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2). 3806b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3807b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Skip over trailing zeroes, and keep a count of them. 3808b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (digVal != 0) 3809b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nonZeroValReached = TRUE; 3810b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (nonZeroValReached) { 3812b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* 3813b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru We parse the digit string into base 100 numbers (this fits into a byte). 3814b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru We only add to the buffer in twos, thus if we are parsing an odd character, 3815b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru that serves as the 'tens' digit while the if we are parsing an even one, that 3816b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into 3817b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid 3818b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less 3819b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru than all the other bytes. 3820b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3821b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru Since we're doing in this reverse we want to put the first digit encountered into the 3822b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ones place and the second digit encountered into the tens place. 3823b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 3824b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3825b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((digIndx + trailingZeroCount) % 2 == 1) { 3826b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // High-order digit case (tens place) 3827b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collateVal += (uint8_t)(digVal * 10); 3828b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3829b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // We cannot set leadingZeroIndex unless it has been set for the 3830b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // low-order digit. Therefore, all we can do for the high-order 3831b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // digit is turn it off, never on. 3832b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The only time we will have a high digit without a low is for 3833b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // the very first non-zero digit, so no zero check is necessary. 3834b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (collateVal != 0) 3835b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru leadingZeroIndex = 0; 3836b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3837b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The first pass through, digIndx may exceed the limit, but in that case 3838b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // we no longer care about numTempBuf contents since they will be discarded 3839b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) { 3840b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; 3841b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3842b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collateVal = 0; 3843b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 3844b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Low-order digit case (ones place) 3845b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collateVal = (uint8_t)digVal; 3846b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3847b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Check for leading zeroes. 3848b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (collateVal == 0) { 3849b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!leadingZeroIndex) 3850b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru leadingZeroIndex = (digIndx/2) + 2; 3851b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else 3852b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru leadingZeroIndex = 0; 3853b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3854b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // No need to write to buffer; the case of a last odd digit 3855b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // is handled below. 3856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3857b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++digIndx; 3858b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else 3859b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++trailingZeroCount; 3860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3861b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!collIter_bos(source)) { 3862b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = getPrevNormalizedChar(source, status); 3863b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //goBackOne(source); 3864b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U16_IS_TRAIL(ch)) { 3865b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru backupState(source, &state); 3866b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!collIter_bos(source)) { 3867b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goBackOne(source); 3868b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar lead = getPrevNormalizedChar(source, status); 3869b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3870b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U16_IS_LEAD(lead)) { 3871b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = U16_GET_SUPPLEMENTARY(lead,ch); 3872b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 3873b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru loadState(source, &state, FALSE); 3874b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = ch; 3875b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3876b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else 3878b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = ch; 3879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3880b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((digVal = u_charDigitValue(char32)) == -1 || (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) { 3881b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (char32 > 0xFFFF) {// For surrogates. 3882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, FALSE); 3883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3884b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Don't need to "reverse" the goBackOne call, 3885b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // as this points to the next position to process.. 3886b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //if (char32 > 0xFFFF) // For surrogates. 3887b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //getNextNormalizedChar(source); 3888b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 3889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3891b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goBackOne(source); 3892b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }else 3893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3894b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3896b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_NUMBER) { 3897b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // our collation element is not too big, go ahead and finish with it 3898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3899b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3900b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // our digit string is too long for a collation element; 3901b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // set the limit for it, reset the state and begin again 3902b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGITS_FOR_NUMBER; 3903b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( ceLimit == 0 ) { 3904b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER; 3905b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3906b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = initial_ch; 3907b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru loadState(source, &initialState, FALSE); 3908b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru digIndx = endIndex = leadingZeroIndex = trailingZeroCount = 0; 3909b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collateVal = 0; 3910b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nonZeroValReached = FALSE; 3911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (! nonZeroValReached) { 3914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx = 2; 3915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroCount = 0; 3916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[2] = 6; 3917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((digIndx + trailingZeroCount) % 2 != 0) { 3920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6; 3921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx += 1; // The implicit leading zero 3922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (trailingZeroCount % 2 != 0) { 3924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We had to consume one trailing zero for the low digit 3925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // of the least significant byte 3926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx += 1; // The trailing zero not in the exponent 3927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroCount -= 1; 3928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ; 3931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Subtract one off of the last byte. Really the first byte here, but it's reversed... 3933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[2] -= 1; 3934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We want to skip over the first two slots in the buffer. The first slot 3937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the 3938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sign/exponent byte: 0x80 + (decimalPos/2) & 7f. 3939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru The exponent must be adjusted by the number of leading zeroes, and the number of 3940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailing zeroes. 3941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[0] = UCOL_CODAN_PLACEHOLDER; 3943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t exponent = (digIndx+trailingZeroCount)/2; 3944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (leadingZeroIndex) 3945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exponent -= ((digIndx/2) + 2 - leadingZeroIndex); 3946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F)); 3947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Now transfer the collation key to our collIterate struct. 394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The total size for our collation key is half of endIndex, rounded up. 395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t size = (endIndex+1)/2; 395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!ensureCEsCapacity(source, size)) { 395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_NULLORDER; 395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight 3955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight 3956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_BYTE_COMMON; // Tertiary weight. 3957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = endIndex - 1; // Reset the index into the buffer. 3958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i >= 2) { 3959b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t primWeight = numTempBuf[i--] << 8; 3960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( i >= 2) 3961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primWeight |= numTempBuf[i--]; 3962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER; 3963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos -1; 3966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->toReturn); 3967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE); 3969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(CEOffset++); 3970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/ 3975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t 3977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7; 3978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t LCount = 19; 3979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t VCount = 21; 3980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t TCount = 28; 3981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t NCount = VCount * TCount; /* 588 */ 3982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t SCount = LCount * NCount; /* 11172 */ 3983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t L = ch - SBase; 3985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru divide into pieces. 3987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru we do it in this order since some compilers can do % and / in one 3988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru operation 3989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t T = L % TCount; 3991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L /= TCount; 3992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t V = L % VCount; 3993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L /= VCount; 3994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* offset them */ 3996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L += LBase; 3997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru V += VBase; 3998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru T += TBase; 3999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 400027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t firstOffset = (int32_t)(source->pos - source->string); 400127f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset, *status); 4002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 4004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * return the first CE, but first put the rest into the expansion buffer 4005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 4006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!source->coll->image->jamoSpecial) { 4007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L); 4008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V); 400927f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 4010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 401127f654740f2a26ad62a5c155af9199af9e69b889claireho if (T != TBase) { 4012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T); 401327f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 401427f654740f2a26ad62a5c155af9199af9e69b889claireho } 4015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos - 1; 4017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 401827f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetReturn = source->offsetStore - 1; 401927f654740f2a26ad62a5c155af9199af9e69b889claireho if (source->offsetReturn == source->offsetBuffer) { 402027f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetStore = source->offsetBuffer; 402127f654740f2a26ad62a5c155af9199af9e69b889claireho } 402227f654740f2a26ad62a5c155af9199af9e69b889claireho 402327f654740f2a26ad62a5c155af9199af9e69b889claireho return *(source->toReturn); 4024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Since Hanguls pass the FCD check, it is 4026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // guaranteed that we won't be in 4027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the normalization buffer if something like this happens 4028b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Move Jamos into normalization buffer 403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *tempbuffer = source->writableBuffer.getBuffer(5); 4031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t tempbufferLength, jamoOffset; 403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbuffer[0] = 0; 403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbuffer[1] = (UChar)L; 403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbuffer[2] = (UChar)V; 4035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (T != TBase) { 403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbuffer[3] = (UChar)T; 403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbufferLength = 4; 4038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbufferLength = 3; 4040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->writableBuffer.releaseBuffer(tempbufferLength); 4042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4043b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Indicate where to continue in main input string after exhausting the writableBuffer 4044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->pos == source->string) { 4045b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho jamoOffset = 0; 4046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->fcdPosition = NULL; 4047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4048b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho jamoOffset = source->pos - source->string; 4049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->fcdPosition = source->pos-1; 4050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Append offsets for the additional chars 4053b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (not the 0, and not the L whose offsets match the original Hangul) 4054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t jamoRemaining = tempbufferLength - 2; 4055b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho jamoOffset++; // appended offsets should match end of original Hangul 4056b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (jamoRemaining-- > 0) { 4057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho source->appendOffset(jamoOffset, *status); 4058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho source->offsetRepeatValue = jamoOffset; 4061b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho source->offsetReturn = source->offsetStore - 1; 4063b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (source->offsetReturn == source->offsetBuffer) { 4064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho source->offsetStore = source->offsetBuffer; 4065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->pos = source->writableBuffer.getTerminatedBuffer() + tempbufferLength; 4068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->origFlags = source->flags; 4069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags |= UCOL_ITER_INNORMBUF; 4070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 4071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(UCOL_IGNORABLE); 4073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case IMPLICIT_TAG: /* everything that is not defined otherwise */ 4077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getPrevImplicit(ch, source); 4078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: Remove CJK implicits as they are handled by the getImplicitPrimary function 4080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/ 4081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getPrevImplicit(ch, source); 4082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SURROGATE_TAG: /* This is a surrogate pair */ 408427f654740f2a26ad62a5c155af9199af9e69b889claireho /* essentially an engaged lead surrogate. */ 4085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* if you have encountered it here, it means that a */ 4086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* broken sequence was encountered and this is an error */ 408727f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 4088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case LEAD_SURROGATE_TAG: /* D800-DBFF*/ 409027f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; /* broken surrogate sequence */ 4091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/ 4093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 4094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 cp = 0; 4095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar prevChar; 409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prev; 4097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isAtStartPrevIterate(source)) { 4098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we are at the start of the string, wrong place to be at */ 409927f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 4100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 410150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (source->pos != source->writableBuffer.getBuffer()) { 4102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prev = source->pos - 1; 4103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prev = source->fcdPosition; 4105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prevChar = *prev; 4107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Handles Han and Supplementary characters here.*/ 4109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_LEAD(prevChar)) { 4110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000)); 4111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->pos = prev; 4112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 411327f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; /* like unassigned */ 4114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getPrevImplicit(cp, source); 4117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* UCA is filled with these. Tailorings are NOT_FOUND */ 4120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* not yet implemented */ 4121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CHARSET_TAG: /* this tag always returns */ 4122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* probably after 1.8 */ 4123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_NOT_FOUND; 4124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: /* this tag always returns */ 4126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INTERNAL_PROGRAM_ERROR; 4127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE=0; 4128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (CE <= UCOL_NOT_FOUND) { 4132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 4137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This should really be a macro */ 4140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function is used to reverse parts of a buffer. We need this operation when doing continuation */ 4141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* secondaries in French */ 4142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 4143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid uprv_ucol_reverse_buffer(uint8_t *start, uint8_t *end) { 4144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t temp; 4145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(start<end) { 4146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp = *start; 4147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *start++ = *end; 4148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *end-- = temp; 4149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 4152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define uprv_ucol_reverse_buffer(TYPE, start, end) { \ 4154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TYPE tempA; \ 4155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruwhile((start)<(end)) { \ 4156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempA = *(start); \ 4157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(start)++ = *(end); \ 4158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(end)-- = tempA; \ 4159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} \ 4160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 4163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the sortkey generation functions */ 4164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 4165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 4166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 4168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Merge two sort keys. 4169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is useful, for example, to combine sort keys from first and last names 4170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to sort such pairs. 4171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Merged sort keys consider on each collation level the first part first entirely, 4172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then the second one. 4173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is possible to merge multiple sort keys by consecutively merging 4174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * another one with the intermediate result. 4175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The length of the merge result is the sum of the lengths of the input sort keys 4177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * minus 1. 4178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src1 the first sort key 4180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src1Length the length of the first sort key, including the zero byte at the end; 4181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * can be -1 if the function is to find the length 4182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src2 the second sort key 4183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src2Length the length of the second sort key, including the zero byte at the end; 4184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * can be -1 if the function is to find the length 4185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest the buffer where the merged sort key is written, 4186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * can be NULL if destCapacity==0 4187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param destCapacity the number of bytes in the dest buffer 4188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the length of the merged sort key, src1Length+src2Length-1; 4189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), 4190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in which cases the contents of dest is undefined 4191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @draft 4193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 4194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 4195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 4196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *src2, int32_t src2Length, 4197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dest, int32_t destCapacity) { 4198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destLength; 4199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t b; 4200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check arguments */ 4202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( src1==NULL || src1Length<-2 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) || 4203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src2==NULL || src2Length<-2 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) || 4204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destCapacity<0 || (destCapacity>0 && dest==NULL) 4205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 4206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* error, attempt to write a zero byte and return 0 */ 4207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dest!=NULL && destCapacity>0) { 4208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest=0; 4209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 4211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check lengths and capacity */ 4214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(src1Length<0) { 4215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src1Length=(int32_t)uprv_strlen((const char *)src1)+1; 4216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(src2Length<0) { 4218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src2Length=(int32_t)uprv_strlen((const char *)src2)+1; 4219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=src1Length+src2Length-1; 4222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destLength>destCapacity) { 4223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the merged sort key does not fit into the destination */ 4224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destLength; 4225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* merge the sort keys with the same number of levels */ 4228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(*src1!=0 && *src2!=0) { /* while both have another level */ 4229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy level from src1 not including 00 or 01 */ 4230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((b=*src1)>=2) { 4231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++src1; 4232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=b; 4233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* add a 02 merge separator */ 4236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=2; 4237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy level from src2 not including 00 or 01 */ 4239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((b=*src2)>=2) { 4240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++src2; 4241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=b; 4242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if both sort keys have another level, then add a 01 level separator and continue */ 4245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*src1==1 && *src2==1) { 4246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++src1; 4247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++src2; 4248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=1; 4249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 4253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * here, at least one sort key is finished now, but the other one 4254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * might have some contents left from containing more levels; 4255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that contents is just appended to the result 4256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 4257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*src1!=0) { 4258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* src1 is not finished, therefore *src2==0, and src1 is appended */ 4259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src2=src1; 4260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* append src2, "the other, unfinished sort key" */ 4262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy((char *)dest, (const char *)src2); 4263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trust that neither sort key contained illegally embedded zero bytes */ 4265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destLength; 4266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 4269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass SortKeyByteSink : public ByteSink { 4271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic: 4272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const uint32_t FILL_ORIGINAL_BUFFER = 1; 4273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const uint32_t DONT_GROW = 2; 4274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink(char *dest, int32_t destCapacity, uint32_t flags=0) 4275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : ownedBuffer_(NULL), buffer_(dest), capacity_(destCapacity), 4276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho appended_(0), 4277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fill_(flags & FILL_ORIGINAL_BUFFER), 4278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho grow_((flags & DONT_GROW) == 0) { 4279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (buffer_ == NULL || capacity_ < 0) { 4280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buffer_ = reinterpret_cast<char *>(&lastResortByte_); 4281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho capacity_ = 0; 4282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual ~SortKeyByteSink() { uprv_free(ownedBuffer_); } 4285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void Append(const char *bytes, int32_t n); 4287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void Append(const uint8_t *bytes, int32_t n) { Append(reinterpret_cast<const char *>(bytes), n); } 4288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void Append(uint8_t b) { 4289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (appended_ < capacity_) { 4290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buffer_[appended_++] = (char)b; 4291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Append(&b, 1); 4293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void Append(uint8_t b1, uint8_t b2) { 4296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t a2 = appended_ + 2; 4297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (a2 <= capacity_) { 4298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buffer_[appended_] = (char)b1; 4299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buffer_[appended_ + 1] = (char)b2; 4300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho appended_ = a2; 4301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char bytes[2] = { (char)b1, (char)b2 }; 4303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Append(bytes, 2); 4304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void Append(const SortKeyByteSink &other) { Append(other.buffer_, other.appended_); } 4307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual char *GetAppendBuffer(int32_t min_capacity, 4308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t desired_capacity_hint, 4309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char *scratch, int32_t scratch_capacity, 4310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t *result_capacity); 4311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t NumberOfBytesAppended() const { return appended_; } 4312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t &LastByte() { 4313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (buffer_ != NULL && appended_ > 0) { 4314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return reinterpret_cast<uint8_t *>(buffer_)[appended_ - 1]; 4315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return lastResortByte_; 4317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *GetLastFewBytes(int32_t n) { 4320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (buffer_ != NULL && appended_ >= n) { 4321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return reinterpret_cast<uint8_t *>(buffer_) + appended_ - n; 4322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 4324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char *GetBuffer() { return buffer_; } 4327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *GetUnsignedBuffer() { return reinterpret_cast<uint8_t *>(buffer_); } 4328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *OrphanUnsignedBuffer(int32_t &orphanedCapacity); 4329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool IsOk() const { return buffer_ != NULL; } // otherwise out-of-memory 4330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate: 4332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented 4333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented 4334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool Resize(int32_t appendCapacity, int32_t length); 4336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void SetNotOk() { 4337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buffer_ = NULL; 4338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho capacity_ = 0; 4339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static uint8_t lastResortByte_; // last-resort return value from LastByte() 4342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char *ownedBuffer_; 4344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char *buffer_; 4345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t capacity_; 4346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t appended_; 4347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool fill_; 4348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool grow_; 4349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 4350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehouint8_t SortKeyByteSink::lastResortByte_ = 0; 4352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid 4354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoSortKeyByteSink::Append(const char *bytes, int32_t n) { 4355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (n <= 0) { 4356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 4357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length = appended_; 4359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho appended_ += n; 4360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((buffer_ + length) == bytes) { 4361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; // the caller used GetAppendBuffer() and wrote the bytes already 4362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (buffer_ == NULL) { 4364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; // allocation failed before already 4365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t available = capacity_ - length; 4367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (bytes == NULL) { 4368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // assume that the caller failed to allocate memory 4369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (fill_) { 4370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (n > available) { 4371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = available; 4372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memset(buffer_, 0, n); 4374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SetNotOk(); // propagate the out-of-memory error 4376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 4377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (n > available) { 4379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (fill_ && available > 0) { 4380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Fill the original buffer completely. 4381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(buffer_ + length, bytes, available); 4382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes += available; 4383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length += available; 4384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n -= available; 4385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho available = 0; 4386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fill_ = FALSE; 4388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (!Resize(n, length)) { 4389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SetNotOk(); 4390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 4391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(buffer_ + length, bytes, n); 4394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 4395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehochar * 4397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoSortKeyByteSink::GetAppendBuffer(int32_t min_capacity, 4398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t desired_capacity_hint, 4399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char *scratch, 4400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t scratch_capacity, 4401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t *result_capacity) { 4402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (min_capacity < 1 || scratch_capacity < min_capacity) { 4403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *result_capacity = 0; 4404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 4405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t available = capacity_ - appended_; 4407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (available >= min_capacity) { 4408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *result_capacity = available; 4409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return buffer_ + appended_; 4410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if (Resize(desired_capacity_hint, appended_)) { 4411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *result_capacity = capacity_ - appended_; 4412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return buffer_ + appended_; 4413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *result_capacity = scratch_capacity; 4415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return scratch; 4416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4417b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 4418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4419b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool 4420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoSortKeyByteSink::Resize(int32_t appendCapacity, int32_t length) { 4421b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (!grow_) { 4422b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 4423b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4424b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newCapacity = 2 * capacity_; 4425b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t altCapacity = length + 2 * appendCapacity; 4426b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (newCapacity < altCapacity) { 4427b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newCapacity = altCapacity; 4428b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4429b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (newCapacity < 1024) { 4430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newCapacity = 1024; 4431b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4432b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char *newBuffer = (char *)uprv_malloc(newCapacity); 4433b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (newBuffer == NULL) { 4434b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 4435b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4436b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(newBuffer, buffer_, length); 4437b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(ownedBuffer_); 4438b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ownedBuffer_ = buffer_ = newBuffer; 4439b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho capacity_ = newCapacity; 4440b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 4441b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 4442b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4443b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehouint8_t * 4444b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoSortKeyByteSink::OrphanUnsignedBuffer(int32_t &orphanedCapacity) { 4445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (buffer_ == NULL || appended_ == 0) { 4446b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho orphanedCapacity = 0; 4447b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 4448b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4449b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (ownedBuffer_ != NULL) { 4450b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // orphan & forget the ownedBuffer_ 4451b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *returnBuffer = reinterpret_cast<uint8_t *>(ownedBuffer_); 4452b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ownedBuffer_ = buffer_ = NULL; 4453b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho orphanedCapacity = capacity_; 4454b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho capacity_ = appended_ = 0; 4455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return returnBuffer; 4456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4457b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // clone the buffer_ 4458b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *newBuffer = (uint8_t *)uprv_malloc(appended_); 4459b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (newBuffer == NULL) { 4460b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho orphanedCapacity = 0; 4461b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 4462b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4463b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(newBuffer, buffer_, appended_); 4464b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho orphanedCapacity = appended_; 4465b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return newBuffer; 4466b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 4467b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 4469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* sortkey API */ 4471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 4472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getSortKey(const UCollator *coll, 4473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 4474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *result, 4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t resultLength) 4477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 4478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY); 4479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 4480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source, 4481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength)); 4482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t keySize = 0; 4486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source != NULL) { 4488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // source == NULL is actually an error situation, but we would need to 4489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // have an error code to return it. Until we introduce a new 4490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // API, it stays like this 4491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* this uses the function pointer that is set in updateinternalstate */ 4493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* currently, there are two funcs: */ 4494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /*ucol_calcSortKey(...);*/ 4495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /*ucol_calcSortKeySimpleTertiary(...);*/ 4496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4497b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink sink(reinterpret_cast<char *>(result), resultLength, 4498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink::FILL_ORIGINAL_BUFFER | SortKeyByteSink::DONT_GROW); 4499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho coll->sortKeyGen(coll, source, sourceLength, sink, &status); 4500b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho keySize = sink.NumberOfBytesAppended(); 4501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); 4503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(status); 4504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return keySize; 4505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this function is called by the C++ API for sortkey generation */ 4508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 4509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getSortKeyWithAllocation(const UCollator *coll, 4510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, int32_t sourceLength, 4511b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *&result, int32_t &resultCapacity, 4512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 4513b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink sink(reinterpret_cast<char *>(result), resultCapacity); 4514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho coll->sortKeyGen(coll, source, sourceLength, sink, pErrorCode); 4515b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t resultLen = sink.NumberOfBytesAppended(); 4516b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(*pErrorCode)) { 4517b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (!sink.IsOk()) { 4518b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 4519b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if (result != sink.GetUnsignedBuffer()) { 4520b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = sink.OrphanUnsignedBuffer(resultCapacity); 4521b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4522b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4523b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return resultLen; 4524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 452627f654740f2a26ad62a5c155af9199af9e69b889claireho// Is this primary weight compressible? 452727f654740f2a26ad62a5c155af9199af9e69b889claireho// Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit). 452827f654740f2a26ad62a5c155af9199af9e69b889claireho// TODO: This should use per-lead-byte flags from FractionalUCA.txt. 452927f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline UBool 453027f654740f2a26ad62a5c155af9199af9e69b889clairehoisCompressible(const UCollator * /*coll*/, uint8_t primary1) { 453127f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_BYTE_FIRST_NON_LATIN_PRIMARY <= primary1 && primary1 <= maxRegularPrimary; 453227f654740f2a26ad62a5c155af9199af9e69b889claireho} 453327f654740f2a26ad62a5c155af9199af9e69b889claireho 4534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 4535b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoinline void doCaseShift(SortKeyByteSink &cases, uint32_t &caseShift) { 4536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (caseShift == 0) { 4537b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho cases.Append(UCOL_CASE_BYTE_START); 4538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift = UCOL_CASE_SHIFT_START; 4539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4542b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Packs the secondary buffer when processing French locale. 4543b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void 4544b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopackFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) { 4545b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries += secsize; // We read the secondary-level bytes back to front. 4546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t secondary; 4547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t count2 = 0; 4548b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i = 0; 4549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we use i here since the key size already accounts for terminators, so we'll discard the increment 4550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(i = 0; i<secsize; i++) { 4551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secondary = *(secondaries-i-1); 4552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This is compression code. */ 4553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary == UCOL_COMMON2) { 4554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ++count2; 4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 4557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary > UCOL_COMMON2) { // not necessary for 4th level. 4558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_TOP_COUNT2) { 4559b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2)); 4560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_TOP_COUNT2; 4561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4562b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1))); 4563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 4565b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); 4566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 4567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4568b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); 4569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 = 0; 4571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(secondary); 4573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 4576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 4577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); 4578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 4579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4580b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); 4581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0 4585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the sortkey work horse function */ 4587b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CFUNC void U_CALLCONV 4588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_calcSortKey(const UCollator *coll, 4589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 4590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 4591b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink &result, 4592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 4593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 4594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 4595b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 4596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4598b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Stack allocated buffers for buffers we use */ 4599b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER]; 4600b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BUFFER]; 4601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4602b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink &primaries = result; 4603b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink secondaries(second, LENGTHOF(second)); 4604b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink tertiaries(tert, LENGTHOF(tert)); 4605b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink cases(caseB, LENGTHOF(caseB)); 4606b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink quads(quad, LENGTHOF(quad)); 4607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 460850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString normSource; 4609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength); 4611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColAttributeValue strength = coll->strength; 4613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF); 4615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF); 4616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF); 4617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool compareIdent = (strength == UCOL_IDENTICAL); 4618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doCase = (coll->caseLevel == UCOL_ON); 4619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0); 4620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool shifted = (coll->alternateHandling == UCOL_SHIFTED); 4621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UBool qShifted = shifted && (compareQuad == 0); 4622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0); 4623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t variableTopValue = coll->variableTopValue; 4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no 4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // qShifted, we don't need to set UCOL_COMMON_BOT4 so high. 4627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1); 4628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_HIRAGANA_QUAD = 0; 4629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(doHiragana) { 4630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++; 4631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* allocate one more space for hiragana, value for hiragana */ 4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4); 4634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* support for special features like caselevel and funky secondaries */ 4636b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t lastSecondaryLength = 0; 4637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t caseShift = 0; 4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* If we need to normalize, we'll do it all at once at the beginning! */ 464050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *norm2; 4641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(compareIdent) { 464250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2 = Normalizer2Factory::getNFDInstance(*status); 4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(coll->normalizationMode != UCOL_OFF) { 464450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2 = Normalizer2Factory::getFCDInstance(*status); 4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 464650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2 = NULL; 464750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 464850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norm2 != NULL) { 464950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normSource.setTo(FALSE, source, len); 465050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status); 465150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(qcYesLength != len) { 465250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString unnormalized = normSource.tempSubString(qcYesLength); 465350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normSource.truncate(qcYesLength); 465450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2->normalizeSecondAndAppend(normSource, unnormalized, *status); 465550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source = normSource.getBuffer(); 465650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len = normSource.length(); 4657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate s; 466050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, source, len, &s, status); 466150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 4662b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 466350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 466427f654740f2a26ad62a5c155af9199af9e69b889claireho s.flags &= ~UCOL_ITER_NORM; // source passed the FCD test or else was normalized. 4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t order = 0; 4667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary1 = 0; 4669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary2 = 0; 4670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t secondary = 0; 4671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiary = 0; 4672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseSwitch = coll->caseSwitch; 4673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryMask = coll->tertiaryMask; 4674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int8_t tertiaryAddition = coll->tertiaryAddition; 4675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryTop = coll->tertiaryTop; 4676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryBottom = coll->tertiaryBottom; 4677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryCommon = coll->tertiaryCommon; 4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseBits = 0; 4679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasShifted = FALSE; 4681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool notIsContinuation = FALSE; 4682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t count2 = 0, count3 = 0, count4 = 0; 4684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadPrimary = 0; 4685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 4687b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = ucol_IGetNextCE(coll, &s, status); 4688b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(order == UCOL_NO_MORE_CES) { 4689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 4690b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4692b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(order == 0) { 4693b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho continue; 4694b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4696b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho notIsContinuation = !isContinuation(order); 4697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4698b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation) { 4699b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK); 4700b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4701b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION)); 4702b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4704b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 4705b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 4706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primary1 = (uint8_t)(order >> 8); 4707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4708b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t originalPrimary1 = primary1; 4709b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation && coll->leadBytePermutationTable != NULL) { 4710b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primary1 = coll->leadBytePermutationTable[primary1]; 4711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4713b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0) 4714b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho || (!notIsContinuation && wasShifted))) 4715b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho || (wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */ 4716b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { 4717b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* and other ignorables should be removed if following a shifted code point */ 4718b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */ 4719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* we should just completely ignore it */ 4720b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho continue; 472127f654740f2a26ad62a5c155af9199af9e69b889claireho } 4722b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(compareQuad == 0) { 4723b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(count4 > 0) { 4724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count4 > UCOL_BOT_COUNT4) { 4725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4)); 4726b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count4 -= UCOL_BOT_COUNT4; 4727b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4728b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1))); 4729b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count4 = 0; 4730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4731b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* We are dealing with a variable and we're treating them as shifted */ 4732b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* This is a shifted ignorable */ 4733b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary1 != 0) { /* we need to check this since we could be in continuation */ 4734b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append(primary1); 4735b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4736b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary2 != 0) { 4737b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append(primary2); 4738b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4739b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho wasShifted = TRUE; 4741b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4742b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho wasShifted = FALSE; 4743b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */ 4744b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */ 4745b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* regular and simple sortkey calc */ 4746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary1 != UCOL_IGNORABLE) { 4747b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation) { 4748b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(leadPrimary == primary1) { 4749b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary2); 4750b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(leadPrimary != 0) { 4752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN)); 4753b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4754b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary2 == UCOL_IGNORABLE) { 4755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* one byter, not compressed */ 4756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary1); 4757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho leadPrimary = 0; 4758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(isCompressible(coll, originalPrimary1)) { 4759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* compress */ 4760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(leadPrimary = primary1, primary2); 4761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4762b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho leadPrimary = 0; 4763b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary1, primary2); 4764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */ 4767b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary2 == UCOL_IGNORABLE) { 4768b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary1); 4769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary1, primary2); 4771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4773b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(secondary > compareSec) { 4776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(!isFrenchSec) { 4777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* This is compression code. */ 4778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (secondary == UCOL_COMMON2 && notIsContinuation) { 4779b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++count2; 4780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (count2 > 0) { 4782b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (secondary > UCOL_COMMON2) { // not necessary for 4th level. 4783b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count2 > UCOL_TOP_COUNT2) { 4784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2)); 4785b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count2 -= (uint32_t)UCOL_TOP_COUNT2; 4786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4787b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1))); 478827f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 4789b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count2 > UCOL_BOT_COUNT2) { 4790b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); 4791b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count2 -= (uint32_t)UCOL_BOT_COUNT2; 479227f654740f2a26ad62a5c155af9199af9e69b889claireho } 4793b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); 4794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4795b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count2 = 0; 4796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4797b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append(secondary); 4798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4799b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4800b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Do the special handling for French secondaries */ 4801b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* We need to get continuation elements and do intermediate restore */ 4802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* abc1c2c3de with french secondaries need to be edc1c2c3ba NOT edc3c2c1ba */ 4803b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation) { 4804b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (lastSecondaryLength > 1) { 4805b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength); 4806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (frenchStartPtr != NULL) { 4807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* reverse secondaries from frenchStartPtr up to frenchEndPtr */ 4808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1; 4809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr); 4810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4812b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho lastSecondaryLength = 1; 4813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4814b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++lastSecondaryLength; 4815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4816b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append(secondary); 4817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) { 4821b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // do the case level if we need to do it. We don't want to calculate 4822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // case level for primary ignorables if we have only primary strength and case level 4823b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // otherwise we would break well formedness of CEs 4824b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho doCaseShift(cases, caseShift); 4825b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation) { 4826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho caseBits = (uint8_t)(tertiary & 0xC0); 4827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(tertiary != 0) { 4829b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(coll->caseFirst == UCOL_UPPER_FIRST) { 4830b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if((caseBits & 0xC0) == 0) { 4831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho cases.LastByte() |= 1 << (--caseShift); 4832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4833b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho cases.LastByte() |= 0 << (--caseShift); 4834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* second bit */ 4835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho doCaseShift(cases, caseShift); 4836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho cases.LastByte() |= ((caseBits>>6)&1) << (--caseShift); 4837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4839b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if((caseBits & 0xC0) == 0) { 4840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho cases.LastByte() |= 0 << (--caseShift); 4841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho cases.LastByte() |= 1 << (--caseShift); 4843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* second bit */ 4844b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho doCaseShift(cases, caseShift); 4845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho cases.LastByte() |= ((caseBits>>7)&1) << (--caseShift); 4846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4851b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation) { 4852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary ^= caseSwitch; 4853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary &= tertiaryMask; 4857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(tertiary > compareTer) { 4858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* This is compression code. */ 4859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* sequence size check is included in the if clause */ 4860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (tertiary == tertiaryCommon && notIsContinuation) { 4861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++count3; 4862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) { 4864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary += tertiaryAddition; 4865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) { 4866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary -= tertiaryAddition; 4867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (count3 > 0) { 4869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((tertiary > tertiaryCommon)) { 4870b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count3 > coll->tertiaryTopCount) { 4871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount)); 4872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count3 -= (uint32_t)coll->tertiaryTopCount; 4873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 4874b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryTop - (count3-1))); 4875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count3 > coll->tertiaryBottomCount) { 4877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount)); 4878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count3 -= (uint32_t)coll->tertiaryBottomCount; 4879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4880b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1))); 4881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4882b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count3 = 0; 4883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4884b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append(tertiary); 4885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(/*qShifted*/(compareQuad==0) && notIsContinuation) { 4889b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it 4890b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(count4>0) { // Close this part 4891b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count4 > UCOL_BOT_COUNT4) { 4892b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4)); 4893b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count4 -= UCOL_BOT_COUNT4; 4894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1))); 4896b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count4 = 0; 4897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4898b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append(UCOL_HIRAGANA_QUAD); // Add the Hiragana 4899b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { // This wasn't Hiragana, so we can continue adding stuff 4900b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count4++; 4901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Here, we are generally done with processing */ 4907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* bailing out would not be too productive */ 4908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(*status)) { 4910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we have done all the CE's, now let's put them together to form a key */ 4911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareSec == 0) { 4912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 4913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 4914b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); 4915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 4916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); 4918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4919b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(UCOL_LEVELTERMINATOR); 4920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(!isFrenchSec || !secondaries.IsOk()) { 4921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(secondaries); 4922b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 4923b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If there are any unresolved continuation secondaries, 4924b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // reverse them here so that we can reverse the whole secondary thing. 4925b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (lastSecondaryLength > 1) { 4926b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength); 4927b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (frenchStartPtr != NULL) { 4928b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* reverse secondaries from frenchStartPtr up to frenchEndPtr */ 4929b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1; 4930b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr); 4931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4933b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho packFrench(secondaries.GetUnsignedBuffer(), secondaries.NumberOfBytesAppended(), result); 4934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(doCase) { 4938b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(UCOL_LEVELTERMINATOR); 4939b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(cases); 4940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareTer == 0) { 4943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count3 > 0) { 4944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (coll->tertiaryCommon != UCOL_COMMON_BOT3) { 4945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 >= coll->tertiaryTopCount) { 4946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount)); 4947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryTopCount; 4948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryTop - count3)); 4950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 > coll->tertiaryBottomCount) { 4952b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount)); 4953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryBottomCount; 4954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4955b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1))); 4956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4958b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(UCOL_LEVELTERMINATOR); 4959b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(tertiaries); 4960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareQuad == 0/*qShifted == TRUE*/) { 4962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(count4 > 0) { 4963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count4 > UCOL_BOT_COUNT4) { 4964b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4)); 4965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count4 -= UCOL_BOT_COUNT4; 4966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4967b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1))); 4968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4969b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(UCOL_LEVELTERMINATOR); 4970b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(quads); 4971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareIdent) { 4974b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(UCOL_LEVELTERMINATOR); 4975b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho u_writeIdenticalLevelRun(s.string, len, result); 4976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4978b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(0); 4979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* To avoid memory leak, free the offset buffer if necessary. */ 4982b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_freeOffsetBuffer(&s); 4983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CFUNC void U_CALLCONV 4987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_calcSortKeySimpleTertiary(const UCollator *coll, 4988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 4989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 4990b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink &result, 4991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 4992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 4993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ALIGN_CODE(16); 4994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 4996b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 4997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4999b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Stack allocated buffers for buffers we use */ 5000b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER]; 5001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5002b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink &primaries = result; 5003b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink secondaries(second, LENGTHOF(second)); 5004b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SortKeyByteSink tertiaries(tert, LENGTHOF(tert)); 5005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString normSource; 5007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = sourceLength; 5009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* If we need to normalize, we'll do it all at once at the beginning! */ 501150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(coll->normalizationMode != UCOL_OFF) { 501250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normSource.setTo(len < 0, source, len); 501350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *norm2 = Normalizer2Factory::getFCDInstance(*status); 501450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status); 501550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(qcYesLength != normSource.length()) { 501650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString unnormalized = normSource.tempSubString(qcYesLength); 501750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normSource.truncate(qcYesLength); 501850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2->normalizeSecondAndAppend(normSource, unnormalized, *status); 501950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source = normSource.getBuffer(); 502050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len = normSource.length(); 5021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate s; 502450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, (UChar *)source, len, &s, status); 502550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 5026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 502750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 502827f654740f2a26ad62a5c155af9199af9e69b889claireho s.flags &= ~UCOL_ITER_NORM; // source passed the FCD test or else was normalized. 5029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t order = 0; 5031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary1 = 0; 5033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary2 = 0; 5034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t secondary = 0; 5035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiary = 0; 5036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseSwitch = coll->caseSwitch; 5037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryMask = coll->tertiaryMask; 5038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int8_t tertiaryAddition = coll->tertiaryAddition; 5039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryTop = coll->tertiaryTop; 5040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryBottom = coll->tertiaryBottom; 5041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryCommon = coll->tertiaryCommon; 5042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool notIsContinuation = FALSE; 5044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t count2 = 0, count3 = 0; 5046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadPrimary = 0; 5047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 5049b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho order = ucol_IGetNextCE(coll, &s, status); 5050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(order == 0) { 5052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho continue; 5053b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5055b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(order == UCOL_NO_MORE_CES) { 5056b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 5057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho notIsContinuation = !isContinuation(order); 5060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5061b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation) { 5062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary = (uint8_t)((order & tertiaryMask)); 5063b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 5064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION)); 5065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 506627f654740f2a26ad62a5c155af9199af9e69b889claireho 5067b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 5068b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 5069b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primary1 = (uint8_t)(order >> 8); 5070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5071b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t originalPrimary1 = primary1; 5072b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->leadBytePermutationTable != NULL && notIsContinuation) { 5073b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primary1 = coll->leadBytePermutationTable[primary1]; 5074b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 507527f654740f2a26ad62a5c155af9199af9e69b889claireho 5076b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */ 5077b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */ 5078b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */ 5079b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* regular and simple sortkey calc */ 5080b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary1 != UCOL_IGNORABLE) { 5081b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation) { 5082b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(leadPrimary == primary1) { 5083b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary2); 5084b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 5085b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(leadPrimary != 0) { 5086b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN)); 5087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5088b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary2 == UCOL_IGNORABLE) { 5089b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* one byter, not compressed */ 5090b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary1); 5091b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho leadPrimary = 0; 5092b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(isCompressible(coll, originalPrimary1)) { 5093b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* compress */ 5094b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(leadPrimary = primary1, primary2); 5095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 5096b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho leadPrimary = 0; 5097b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary1, primary2); 5098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */ 5101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(primary2 == UCOL_IGNORABLE) { 5102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary1); 5103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho primaries.Append(primary1, primary2); 5105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(secondary > 0) { /* I think that != 0 test should be != IGNORABLE */ 5110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* This is compression code. */ 5111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (secondary == UCOL_COMMON2 && notIsContinuation) { 5112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++count2; 5113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 5114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (count2 > 0) { 5115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (secondary > UCOL_COMMON2) { // not necessary for 4th level. 5116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count2 > UCOL_TOP_COUNT2) { 5117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2)); 5118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count2 -= (uint32_t)UCOL_TOP_COUNT2; 5119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1))); 5121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 5122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count2 > UCOL_BOT_COUNT2) { 5123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); 5124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count2 -= (uint32_t)UCOL_BOT_COUNT2; 5125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); 5127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count2 = 0; 5129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append(secondary); 5131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(notIsContinuation) { 5135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary ^= caseSwitch; 5136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(tertiary > 0) { 5139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* This is compression code. */ 5140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* sequence size check is included in the if clause */ 5141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (tertiary == tertiaryCommon && notIsContinuation) { 5142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++count3; 5143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 5144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) { 5145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary += tertiaryAddition; 5146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) { 5147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiary -= tertiaryAddition; 5148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (count3 > 0) { 5150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((tertiary > tertiaryCommon)) { 5151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count3 > coll->tertiaryTopCount) { 5152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount)); 5153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count3 -= (uint32_t)coll->tertiaryTopCount; 5154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryTop - (count3-1))); 5156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (count3 > coll->tertiaryBottomCount) { 5158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount)); 5159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count3 -= (uint32_t)coll->tertiaryBottomCount; 5160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1))); 5162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho count3 = 0; 5164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append(tertiary); 5166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we have done all the CE's, now let's put them together to form a key */ 5172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 5173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 5174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); 5175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 5176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); 5178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(UCOL_LEVELTERMINATOR); 5180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(secondaries); 5181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count3 > 0) { 5183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) { 5184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 >= coll->tertiaryTopCount) { 5185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount)); 5186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryTopCount; 5187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryTop - count3)); 5189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 > coll->tertiaryBottomCount) { 5191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount)); 5192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryBottomCount; 5193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1))); 5195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(UCOL_LEVELTERMINATOR); 5198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(tertiaries); 5199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.Append(0); 5201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* To avoid memory leak, free the offset buffer if necessary. */ 5204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_freeOffsetBuffer(&s); 5205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(*status) && !result.IsOk()) { 5207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *status = U_BUFFER_OVERFLOW_ERROR; 5208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 5210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline 5212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) { 5213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool notIsContinuation = !isContinuation(CE); 5214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF); 521527f654740f2a26ad62a5c155af9199af9e69b889claireho if((LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0) 521627f654740f2a26ad62a5c155af9199af9e69b889claireho || (!notIsContinuation && *wasShifted))) 5217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (*wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */ 5218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 5219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The stuff below should probably be in the sortkey code... maybe not... 5220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary1 != 0) { /* if we were shifted and we got an ignorable code point */ 5221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we should just completely ignore it */ 5222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *wasShifted = TRUE; 5223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //continue; 5224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //*wasShifted = TRUE; 5226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 5227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *wasShifted = FALSE; 5229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 5230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 5232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline 5233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *dest) { 5234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(level < maxLevel) { 5235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = UCOL_LEVELTERMINATOR; 5236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = 0; 5238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 5240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** enumeration of level identifiers for partial sort key generation */ 5242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 5243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_PRIMARY = 0, 5244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_SECONDARY = 1, 5245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_CASE = 2, 5246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_TERTIARY = 3, 5247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_QUATERNARY = 4, 5248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_QUIN = 5, /** This is an extra level, not used - but we have three bits to blow */ 5249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_IDENTICAL = 6, 5250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_NULL = 7, /** level for the end of sort key. Will just produce zeros */ 5251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_LIMIT 5252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 5253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** collation state enum. *_SHIFT value is how much to shift right 5255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to get the state piece to the right. *_MASK value should be 5256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ANDed with the shifted state. This data is stored in state[1] 5257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * field. 5258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 5260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_LEVEL_SHIFT = 0, /** level identificator. stores an enum value from above */ 5261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_LEVEL_MASK = 7, /** three bits */ 5262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */ 5263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1, 5264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** can be only 0 or 1, since we get up to two bytes from primary or quaternary 5265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This field is also used to denote that the French secondary level is finished 5266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_WAS_SHIFTED_SHIFT = 4,/** was the last value shifted */ 5268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_WAS_SHIFTED_MASK = 1, /** can be 0 or 1 (Boolean) */ 5269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already written */ 5270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */ 5271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** When we do French we need to reverse secondary values. However, continuations 5272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba 5273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_BOCSU_BYTES_SHIFT = 7, 5275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_BOCSU_BYTES_MASK = 3, 5276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_CONSUMED_CES_SHIFT = 9, 5277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF 5278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 5279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// macro calculating the number of expansion CEs available 5281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn 5282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** main sortkey part procedure. On the first call, 5285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * you should pass in a collator, an iterator, empty state 5286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * state[0] == state[1] == 0, a buffer to hold results 5287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of bytes you need and an error code pointer. 5288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make sure your buffer is big enough to hold the wanted 5289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of sortkey bytes. I don't check. 5290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The only meaningful status you can get back is 5291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U_BUFFER_OVERFLOW_ERROR, which basically means that you 5292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have been dealt a raw deal and that you probably won't 5293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * be able to use partial sortkey generation for this 5294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * particular combination of string and collator. This 5295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is highly unlikely, but you should still check the error code. 5296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any other status means that you're not in a sane situation 5297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * anymore. After the first call, preserve state values and 5298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * use them on subsequent calls to obtain more bytes of a sortkey. 5299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Use until the number of bytes written is smaller than the requested 5300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of bytes. Generated sortkey is not compatible with the 5301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * one generated by ucol_getSortKey, as we don't do any compression. 5302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * However, levels are still terminated by a 1 (one) and the sortkey 5303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is terminated by a 0 (zero). Identical level is the same as in the 5304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * regular sortkey - internal bocu-1 implementation is used. 5305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For curious, although you cannot do much about this, here is 5306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the structure of state words. 5307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * state[0] - iterator state. Depends on the iterator implementation, 5308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * but allows the iterator to continue where it stopped in 5309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last iteration. 5310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * state[1] - collation processing state. Here is the distribution 5311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the bits: 5312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0, 1, 2 - level of the sortkey - primary, secondary, case, tertiary 5313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * quaternary, quin (we don't use this one), identical and 5314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * null (producing only zeroes - first one to terminate the 5315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * sortkey and subsequent to fill the buffer). 5316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 3 - byte count. Number of bytes written on the primary level. 5317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4 - was shifted. Whether the previous iteration finished in the 5318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * shifted state. 5319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 5, 6 - French continuation bytes written. See the comment in the enum 5320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 7,8 - Bocsu bytes used. Number of bytes from a bocu sequence on 5321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the identical level. 5322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 9..31 - CEs consumed. Number of getCE or next32 operations performed 5323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * since thes last successful update of the iterator state. 5324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 5326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_nextSortKeyPart(const UCollator *coll, 5327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharIterator *iter, 5328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t state[2], 5329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dest, int32_t count, 5330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) 5331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 5332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* error checking */ 5333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(status==NULL || U_FAILURE(*status)) { 5334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 5335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART); 5337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( coll==NULL || iter==NULL || 5338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state==NULL || 5339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count<0 || (count>0 && dest==NULL) 5340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 5341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status=U_ILLEGAL_ARGUMENT_ERROR; 5342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_STATUS(status); 5343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 5344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d", 5347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll, iter, state[0], state[1], dest, count); 5348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count==0) { 5350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* nothing to do */ 5351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_VALUE(0); 5352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 5353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** Setting up situation according to the state we got from the previous iteration */ 5355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The state of the iterator from the previous invocation 5356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t iterState = state[0]; 5357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Has the last iteration ended in the shifted state 5358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_SHIFTED_MASK)?TRUE:FALSE; 5359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // What is the current level of the sortkey? 5360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK; 5361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Have we written only one byte from a two byte primary in the previous iteration? 5362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Also on secondary level - have we finished with the French secondary? 5363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK; 5364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // number of bytes in the continuation buffer for French 5365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK; 5366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Number of bytes already written from a bocsu sequence. Since 5367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the longes bocsu sequence is 4 long, this can be up to 3. 5368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK_BOCSU_BYTES_MASK; 5369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Number of elements that need to be consumed in this iteration because 5370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the iterator returned UITER_NO_STATE at the end of the last iteration, 5371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so we had to save the last valid state. 5372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED_CES_MASK; 5373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** values that depend on the collator attributes */ 5375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // strength of the collator. 5376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status); 5377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // maximal level of the partial sortkey. Need to take whether case level is done 5378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxLevel = 0; 5379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strength < UCOL_TERTIARY) { 5380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) { 5381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = UCOL_PSK_CASE; 5382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = strength; 5384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strength == UCOL_TERTIARY) { 5387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = UCOL_PSK_TERTIARY; 5388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(strength == UCOL_QUATERNARY) { 5389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = UCOL_PSK_QUATERNARY; 5390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // identical 5391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = UCOL_IDENTICAL; 5392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation 5395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_HIRAGANA_QUAD = 5396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF; 5397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Boundary value that decides whether a CE is shifted or not 5398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0; 5399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Are we doing French collation? 5400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON); 5401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** initializing the collation state */ 5403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool notIsContinuation = FALSE; 5404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t CE = UCOL_NO_MORE_CES; 5405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate s; 540750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, NULL, -1, &s, status); 540850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 540950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTRACE_EXIT_STATUS(*status); 541050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 541150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.iterator = iter; 5413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.flags |= UCOL_USE_ITERATOR; 5414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This variable tells us whether we have produced some other levels in this iteration 5415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // before we moved to the identical level. In that case, we need to switch the 5416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // type of the iterator. 5417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doingIdenticalFromStart = FALSE; 5418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Normalizing iterator 5419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The division for the array length may truncate the array size to 5420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high 5421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for all platforms anyway. 5422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 5423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormIterator *normIter = NULL; 5424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the normalization is turned on for the collator and we are below identical level 5425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we will use a FCD normalizing iterator 5426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && level < UCOL_PSK_IDENTICAL) { 5427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status); 5428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status); 5429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.flags &= ~UCOL_ITER_NORM; 5430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 5431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(level == UCOL_PSK_IDENTICAL) { 5435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // for identical level, we need a NFD iterator. We need to instantiate it here, since we 5436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // will be updating the state - and this cannot be done on an ordinary iterator. 5437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status); 5438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); 5439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.flags &= ~UCOL_ITER_NORM; 5440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 5441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru doingIdenticalFromStart = TRUE; 5445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is the tentative new state of the iterator. The problem 5448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // is that the iterator might return an undefined state, in 5449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which case we should save the last valid state and increase 5450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the iterator skip value. 5451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newState = 0; 5452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First, we set the iterator to the last valid position 5454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // from the last iteration. This was saved in state[0]. 5455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(iterState == 0) { 5456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* initial state */ 5457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone) { 5458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_LIMIT); 5459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset to previous state */ 5464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->setState(s.iterator, iterState, status); 5465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 5466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5468c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This variable tells us whether we can attempt to update the state 5474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // of iterator. Situations where we don't want to update iterator state 5475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are the existence of expansion CEs that are not yet processed, and 5476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // finishing the case level without enough space in the buffer to insert 5477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a level terminator. 5478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool canUpdateState = TRUE; 5479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Consume all the CEs that were consumed at the end of the previous 5481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // iteration without updating the iterator state. On identical level, 5482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // consume the code points. 5483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t counter = cces; 5484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level < UCOL_PSK_IDENTICAL) { 5485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(counter-->0) { 5486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If we're doing French and we are on the secondary level, 5487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we go backwards. 5488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(level == UCOL_PSK_SECONDARY && doingFrench) { 5489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetPrevCE(coll, &s, status); 5490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 5492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* should not happen */ 5495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status=U_INTERNAL_PROGRAM_ERROR; 5496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 5500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(counter-->0) { 5505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uiter_next32(s.iterator); 5506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // French secondary needs to know whether the iterator state of zero came from previous level OR 5510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // from a new invocation... 5511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasDoingPrimary = FALSE; 5512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // destination buffer byte counter. When this guy 5513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // gets to count, we're done with the iteration 5514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0; 5515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // used to count the zero bytes written after we 5516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // have finished with the sort key 5517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t j = 0; 5518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Hm.... I think we're ready to plunge in. Basic story is as following: 5521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we have a fall through case based on level. This is used for initial 5522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // positioning on iteration start. Every level processor contains a 5523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for(;;) which will be broken when we exhaust all the CEs. Other 5524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // way to exit is a goto saveState, which happens when we have filled 5525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // out our buffer. 5526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(level) { 5527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_PRIMARY: 5528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru wasDoingPrimary = TRUE; 5529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 5530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i==count) { 5531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We should save the state only if we 5534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are sure that we are done with the 5535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // previous iterator state 5536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState && byteCountOrFrenchDone == 0) { 5537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 5538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 5539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 5540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 5544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cces++; 5545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Add the level separator 5547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 5548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone=0; 5549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Restart the iteration an move to the 5550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // second level 5551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cces = 0; 5553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_SECONDARY; 5554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 555627f654740f2a26ad62a5c155af9199af9e69b889claireho if(!isContinuation(CE)){ 555727f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->leadBytePermutationTable != NULL){ 555827f654740f2a26ad62a5c155af9199af9e69b889claireho CE = (coll->leadBytePermutationTable[CE>>24] << 24) | (CE & 0x00FFFFFF); 555927f654740f2a26ad62a5c155af9199af9e69b889claireho } 556027f654740f2a26ad62a5c155af9199af9e69b889claireho } 5561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isShiftedCE(CE, LVT, &wasShifted)) { 5562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */ 5563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 5564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(byteCountOrFrenchDone == 0) { 5565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // get the second byte of primary 5566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)(CE >> 8); 5567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 5569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((CE &=0xff)!=0) { 5571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i==count) { 5572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* overflow */ 5573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 1; 5574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces--; 5575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 5578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 5582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 5585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* fall through to next level */ 5588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_PSK_SECONDARY: 5589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_SECONDARY) { 5590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!doingFrench) { 5591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 5592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 5593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We should save the state only if we 5596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are sure that we are done with the 5597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state 5598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 5599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 5600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 5601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 5602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 5606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 5607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 5609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 5610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 5611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration an move to the 5612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // second level 5613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_CASE; 5616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isShiftedCE(CE, LVT, &wasShifted)) { 5619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= 8; /* get secondary */ 5620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 5621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 5622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 5625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 5628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // French secondary processing 5631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t frenchBuff[UCOL_MAX_BUFFER]; 5632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t frenchIndex = 0; 5633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Here we are going backwards. 5634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If the iterator is at the beggining, it should be 5635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // moved to end. 5636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(wasDoingPrimary) { 5637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_LIMIT); 5638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 5641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 5642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 5645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 5646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 5647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 5648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetPrevCE(coll, &s, status); 5652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 5653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 5655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 5656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 5657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration an move to the next level 5658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_CASE; 5660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(CE)) { // if it's a continuation, we want to save it and 5663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // reverse when we get a first non-continuation CE. 5664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= 8; 5665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchBuff[frenchIndex++] = (uint8_t)CE; 5666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(!isShiftedCE(CE, LVT, &wasShifted)) { 5667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= 8; /* get secondary */ 5668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!frenchIndex) { 5669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 5670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 5671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchBuff[frenchIndex++] = (uint8_t)CE; 5674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchIndex -= usedFrench; 5675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru usedFrench = 0; 5676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i < count && frenchIndex) { 5677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = frenchBuff[--frenchIndex]; 5678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru usedFrench++; 5679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 5683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 5686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_CASE; 5691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 5693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_CASE: 5694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) { 5695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t caseShift = UCOL_CASE_SHIFT_START; 5696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t caseByte = UCOL_CASE_BYTE_START; 5697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t caseBits = 0; 5698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 570050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(caseShift <= UCOL_CASE_SHIFT_START); 5701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 5702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We should save the state only if we 5705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are sure that we are done with the 5706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state 5707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 5708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 5709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 5710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 5711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 5715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 5716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // On the case level we might have an unfinished 5718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // case byte. Add one if it's started. 5719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(caseShift != UCOL_CASE_SHIFT_START) { 5720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = caseByte; 5721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have finished processing CEs on this level. 5724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // However, we don't know if we have enough space 5725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // to add a case level terminator. 5726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i < count) { 5727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 5728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 5729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration and move to the 5730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // next level 5731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_TERTIARY; 5733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isShiftedCE(CE, LVT, &wasShifted)) { 5740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 || strength > UCOL_PRIMARY)) { 5741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // do the case level if we need to do it. We don't want to calculate 5742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // case level for primary ignorables if we have only primary strength and case level 5743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise we would break well formedness of CEs 5744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK); 5745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseBits = (uint8_t)(CE & 0xC0); 5746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // this copies the case level logic from the 5747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // sort key generation code 5748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 574950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (caseShift == 0) { 575050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i++] = caseByte; 575150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho caseShift = UCOL_CASE_SHIFT_START; 575250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho caseByte = UCOL_CASE_BYTE_START; 575350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseFirst == UCOL_UPPER_FIRST) { 5755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((caseBits & 0xC0) == 0) { 5756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= 1 << (--caseShift); 5757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= 0 << (--caseShift); 5759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* second bit */ 5760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(caseShift == 0) { 5761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = caseByte; 5762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift = UCOL_CASE_SHIFT_START; 5763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte = UCOL_CASE_BYTE_START; 5764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= ((caseBits>>6)&1) << (--caseShift); 5766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((caseBits & 0xC0) == 0) { 5769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= 0 << (--caseShift); 5770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= 1 << (--caseShift); 5772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* second bit */ 5773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(caseShift == 0) { 5774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = caseByte; 5775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift = UCOL_CASE_SHIFT_START; 5776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte = UCOL_CASE_BYTE_START; 5777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= ((caseBits>>7)&1) << (--caseShift); 5779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Not sure this is correct for the case level - revisit 5786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 5787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 5790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_TERTIARY; 5794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 5796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_TERTIARY: 5797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_TERTIARY) { 5798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 5799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 5800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We should save the state only if we 5803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are sure that we are done with the 5804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state 5805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 5806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 5807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 5808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 5809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 5813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 5814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 5816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 5817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 5818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration an move to the 5819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // second level 5820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_QUATERNARY; 5823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isShiftedCE(CE, LVT, &wasShifted)) { 5826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notIsContinuation = !isContinuation(CE); 5827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 5829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK); 5830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE ^= coll->caseSwitch; 5831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE &= coll->tertiaryMask; 5832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION)); 5834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 5837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 5838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 5841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 5844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if we're not doing tertiary 5848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // skip to the end 5849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_NULL; 5850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 5852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_QUATERNARY: 5853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_QUATERNARY) { 5854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 5855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 5856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We should save the state only if we 5859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are sure that we are done with the 5860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state 5861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 5862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 5863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 5864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 5865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 5869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 5870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 5872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 5873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //dest[i++] = UCOL_LEVELTERMINATOR; 5874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 5875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration an move to the 5876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // second level 5877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_QUIN; 5880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==0) 5883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 5884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isShiftedCE(CE, LVT, &wasShifted)) { 5885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= 16; /* get primary */ 5886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 5887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(byteCountOrFrenchDone == 0) { 5888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)(CE >> 8); 5889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 5891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((CE &=0xff)!=0) { 5893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i==count) { 5894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* overflow */ 5895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 1; 5896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 5899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notIsContinuation = !isContinuation(CE); 5903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 5904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it 5905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = UCOL_HIRAGANA_QUAD; 5906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = 0xFF; 5908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 5912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 5915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if we're not doing quaternary 5919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // skip to the end 5920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_NULL; 5921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 5923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_QUIN: 5924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_IDENTICAL; 5925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 5926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_IDENTICAL: 5927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_IDENTICAL) { 5928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 first, second; 5929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t bocsuBytesWritten = 0; 5930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We always need to do identical on 5931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the NFD form of the string. 5932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(normIter == NULL) { 5933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we arrived from the level below and 5934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // normalization was not turned on. 5935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // therefore, we need to make a fresh NFD iterator 5936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status); 5937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); 5938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(!doingIdenticalFromStart) { 5939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // there is an iterator, but we did some other levels. 5940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // therefore, we have a FCD iterator - need to make 5941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a NFD one. 5942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // normIter being at the beginning does not guarantee 5943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that the underlying iterator is at the beginning 5944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iter->move(iter, 0, UITER_START); 5945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); 5946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // At this point we have a NFD iterator that is positioned 5948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // in the right place 5949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 5950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru first = uiter_previous32(s.iterator); 5954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // maybe we're at the start of the string 5955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(first == U_SENTINEL) { 5956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru first = 0; 5957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uiter_next32(s.iterator); 5959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru j = 0; 5962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 5963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 5964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(j+1 < bocsuBytesWritten) { 5965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bocsuBytesUsed = j+1; 5966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // On identical level, we will always save 5971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the state if we reach this point, since 5972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we don't depend on getNextCE for content 5973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // all the content is in our buffer and we 5974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // already either stored the full buffer OR 5975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise we won't arrive here. 5976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 5977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 5978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 5979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t buff[4]; 5983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru second = uiter_next32(s.iterator); 5984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 5985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // end condition for identical level 5987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(second == U_SENTINEL) { 5988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 5989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_NULL; 5990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, second, buff); 5993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru first = second; 5994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru j = 0; 5996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(bocsuBytesUsed != 0) { 5997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(bocsuBytesUsed-->0) { 5998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru j++; 5999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i < count && j < bocsuBytesWritten) { 6003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = buff[j++]; 6004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_NULL; 6009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 6011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_NULL: 6012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru j = i; 6013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(j<count) { 6014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[j++]=0; 6015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 6018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INTERNAL_PROGRAM_ERROR; 6019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 6020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusaveState: 6024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now we need to return stuff. First we want to see whether we have 6025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // done everything for the current state of iterator. 6026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(byteCountOrFrenchDone 6027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || canUpdateState == FALSE 6028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE) 6029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Any of above mean that the previous transaction 6031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // wasn't finished and that we should store the 6032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state. 6033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru state[0] = iterState; 6034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The transaction is complete. We will continue in the next iteration. 6036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[0] = s.iterator->getState(s.iterator); 6037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cces = 0; 6038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Store the number of bocsu bytes written. 6040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) { 6041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 6042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BYTES_SHIFT; 6044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Next we put in the level of comparison 6046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT); 6047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we are doing French, we need to store whether we have just finished the French level 6049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level == UCOL_PSK_SECONDARY && doingFrench) { 6050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru state[1] |= (((state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT); 6051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru state[1] |= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT); 6053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Was the latest CE shifted 6056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(wasShifted) { 6057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru state[1] |= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT; 6058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check for cces overflow 6060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) { 6061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 6062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Store cces 6064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[1] |= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SHIFT); 6065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check for French overflow 6067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) { 6068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 6069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Store number of bytes written in the French secondary continuation sequence 6071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[1] |= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENCH_SHIFT); 6072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we have used normalizing iterator, get rid of it 6075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(normIter != NULL) { 6076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(normIter); 6077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* To avoid memory leak, free the offset buffer if necessary. */ 6080b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_freeOffsetBuffer(&s); 6081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Return number of meaningful sortkey bytes. 6083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d", 6084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest,i, state[0], state[1]); 6085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_VALUE(i); 6086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return i; 6087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 6090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Produce a bound for a given sortkey and a number of levels. 6091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 6092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 6093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getBound(const uint8_t *source, 6094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 6095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColBoundMode boundType, 6096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t noOfLevels, 6097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *result, 6098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t resultLength, 6099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) 6100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 6101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // consistency checks 6102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(status == NULL || U_FAILURE(*status)) { 6103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source == NULL) { 6106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t sourceIndex = 0; 6111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Scan the string until we skip enough of the key OR reach the end of the key 6112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 6113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sourceIndex++; 6114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source[sourceIndex] == UCOL_LEVELTERMINATOR) { 6115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru noOfLevels--; 6116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while (noOfLevels > 0 6118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru && (source[sourceIndex] != 0 || sourceIndex < sourceLength)); 6119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((source[sourceIndex] == 0 || sourceIndex == sourceLength) 6121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru && noOfLevels > 0) { 6122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_SORT_KEY_TOO_SHORT_WARNING; 6123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // READ ME: this code assumes that the values for boundType 6127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // enum will not changes. They are set so that the enum value 6128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // corresponds to the number of extra bytes each bound type 6129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // needs. 6130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(result != NULL && resultLength >= sourceIndex+boundType) { 6131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(result, source, sourceIndex); 6132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch(boundType) { 6133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Lower bound just gets terminated. No extra bytes 6134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_BOUND_LOWER: // = 0 6135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Upper bound needs one extra byte 6137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_BOUND_UPPER: // = 1 6138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result[sourceIndex++] = 2; 6139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Upper long bound needs two extra bytes 6141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_BOUND_UPPER_LONG: // = 2 6142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result[sourceIndex++] = 0xFF; 6143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result[sourceIndex++] = 0xFF; 6144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: 6146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result[sourceIndex++] = 0; 6150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return sourceIndex; 6152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return sourceIndex+boundType+1; 6154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 6158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the functions that deal with the properties of a collator */ 6159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there are new APIs and some compatibility APIs */ 6160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 6161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void 6163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE, 6164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t *primShift, int32_t *secShift, int32_t *terShift) 6165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 6166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0; 6167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool reverseSecondary = FALSE; 616827f654740f2a26ad62a5c155af9199af9e69b889claireho UBool continuation = isContinuation(CE); 616927f654740f2a26ad62a5c155af9199af9e69b889claireho if(!continuation) { 6170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary = (uint8_t)((CE & coll->tertiaryMask)); 6171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary ^= coll->caseSwitch; 6172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru reverseSecondary = TRUE; 6173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION)); 6175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary &= UCOL_REMOVE_CASE; 6176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru reverseSecondary = FALSE; 6177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK); 6180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK); 6181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primary1 = (uint8_t)(CE >> 8); 6182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary1 != 0) { 618427f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->leadBytePermutationTable != NULL && !continuation) { 618527f654740f2a26ad62a5c155af9199af9e69b889claireho primary1 = coll->leadBytePermutationTable[primary1]; 618627f654740f2a26ad62a5c155af9199af9e69b889claireho } 618727f654740f2a26ad62a5c155af9199af9e69b889claireho 6188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] |= (primary1 << *primShift); 6189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primShift -= 8; 6190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 != 0) { 6192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(*primShift < 0) { 6193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE; 6194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; 6195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; 6196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 6197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] |= (primary2 << *primShift); 6199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primShift -= 8; 6200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secondary != 0) { 6202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse secondary 6203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space for secondary 6204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24); 6205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // normal case 6206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secShift); 6207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secShift -= 8; 6209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tertiary != 0) { 6211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+ch] |= (tertiary << *terShift); 6212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *terShift -= 8; 6213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool 6217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_resizeLatinOneTable(UCollator *coll, int32_t size, UErrorCode *status) { 6218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *newTable = (uint32_t *)uprv_malloc(size*sizeof(uint32_t)*3); 6219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(newTable == NULL) { 6220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 6221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->latinOneFailed = TRUE; 6222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 6223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTableLen)*sizeof(uint32_t); 6225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(newTable, 0, size*sizeof(uint32_t)*3); 6226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy); 6227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToCopy); 6228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(newTable+2*size, coll->latinOneCEs+2*coll->latinOneTableLen, sizeToCopy); 6229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->latinOneTableLen = size; 6230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->latinOneCEs); 6231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->latinOneCEs = newTable; 6232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 6233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 6236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setUpLatinOne(UCollator *coll, UErrorCode *status) { 6237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool result = TRUE; 6238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->latinOneCEs == NULL) { 6239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*UCOL_LATINONETABLELEN*3); 6240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->latinOneCEs == NULL) { 6241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 6242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 6243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneTableLen = UCOL_LATINONETABLELEN; 6245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar ch = 0; 6247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationElements *it = ucol_openElements(coll, &ch, 1, status); 6248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for null pointer 6249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(*status)) { 6250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 6251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)*coll->latinOneTableLen*3); 6253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t primShift = 24, secShift = 24, terShift = 24; 6255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t CE = 0; 6256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1; 6257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: make safe if you get more than you wanted... 6259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) { 6260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primShift = 24; secShift = 24; terShift = 24; 6261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ch < 0x100) { 6262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = coll->latinOneMapping[ch]; 6263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 6265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE == UCOL_NOT_FOUND && coll->UCA) { 6266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 6267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE < UCOL_NOT_FOUND) { 6270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift); 6271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch (getCETag(CE)) { 6273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case EXPANSION_TAG: 6274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case DIGIT_TAG: 6275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setText(it, &ch, 1, status); 6276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) { 6277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(primShift < 0 || secShift < 0 || terShift < 0) { 6278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE; 6279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; 6280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; 6281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift); 6284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CONTRACTION_TAG: 6287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // here is the trick 6288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // F2 is contraction. We do something very similar to contractions 6289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // but have two indices, one in the real contraction table and the 6290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // other to where we stuffed things. This hopes that we don't have 6291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // many contractions (this should work for latin-1 tables). 6292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((CE & 0x00FFF000) != 0) { 6294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_UNSUPPORTED_ERROR; 6295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE); 6299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table 6301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] = CE; 6303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] = CE; 6304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE; 6305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We're going to jump into contraction table, pick the elements 6307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // and use them 6308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 6309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 6310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 6311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) { 6312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t size; 6313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i; /* general counter */ 6314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */ 6315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru size = getExpansionCount(CE); 6316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //CE = *CEOffset++; 6317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */ 6318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 0; i<size; i++) { 6319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primShift < 0 || secShift < 0 || terShift < 0) { 6320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift); 6326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* else, we do */ 6328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(*CEOffset != 0) { 6329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primShift < 0 || secShift < 0 || terShift < 0) { 6330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift); 6336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru contractionOffset++; 6339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(CE < UCOL_NOT_FOUND) { 6340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, (UChar)contractionOffset++, CE, &primShift, &secShift, &terShift); 6341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru contractionOffset++; 6346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset++; 6348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primShift = 24; secShift = 24; terShift = 24; 6349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate 6350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) { 6351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while(*UCharOffset != 0xFFFF); 6355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break;; 6357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SPEC_PROC_TAG: 6358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 0xB7 is a precontext character defined in UCA5.1, a special 6360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // handle is implemeted in order to save LatinOne table for 6361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // most locales. 6362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ch==0xb7) { 6363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift); 6364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 6366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: 6371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // compact table 6376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(contractionOffset < coll->latinOneTableLen) { 6377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) { 6378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(it); 6382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 6383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querucleanup_after_failure: 6385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // status should already be set before arriving here. 6386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneFailed = TRUE; 6387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(it); 6388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 6389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid ucol_updateInternalState(UCollator *coll, UErrorCode *status) { 6392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 6393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseFirst == UCOL_UPPER_FIRST) { 6394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->caseSwitch = UCOL_CASE_SWITCH; 6395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->caseSwitch = UCOL_NO_CASE_SWITCH; 6397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) { 6400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryMask = UCOL_REMOVE_CASE; 6401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryCommon = UCOL_COMMON3_NORMAL; 6402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF; /* Should be 0x80 */ 6403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF; 6404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryBottom = UCOL_COMMON_BOT3; 6405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryMask = UCOL_KEEP_CASE; 6407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON; 6408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseFirst == UCOL_UPPER_FIRST) { 6409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST; 6410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER; 6411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER; 6412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryCommon = UCOL_COMMON3_NORMAL; 6414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER; 6415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER; 6416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Set the compression values */ 6420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - coll->tertiaryBottom - 1); 6421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal); /* we multilply double with int, but need only int */ 6422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount); 6423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY 6425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE) 6426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->sortKeyGen = ucol_calcSortKeySimpleTertiary; 6428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->sortKeyGen = ucol_calcSortKey; 6430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF 6432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed) 6433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) { 6435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it 6436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "F"); 6437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneUse = TRUE; 6438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneUse = FALSE; 6440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(*status == U_UNSUPPORTED_ERROR) { 6442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ZERO_ERROR; 6443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // latin1Table exists and it doesn't need to be regenerated, just use it 6445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneUse = TRUE; 6446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneUse = FALSE; 6449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 6454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) { 6455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(len == -1) { 6459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru len = u_strlen(varTop); 6460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(len == 0) { 6462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate s; 646750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, varTop, len, &s, status); 646850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 646950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 647050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 6471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t CE = ucol_IGetNextCE(coll, &s, status); 6473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* here we check if we have consumed all characters */ 6475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* you can put in either one character or a contraction */ 6476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* you shouldn't put more... */ 6477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(s.pos != s.endp || CE == UCOL_NO_MORE_CES) { 6478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_CE_NOT_FOUND_ERROR; 6479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t nextCE = ucol_IGetNextCE(coll, &s, status); 6483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) { 6485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_PRIMARY_TOO_LONG_ERROR; 6486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) { 6489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->variableTopValueisDefault = FALSE; 6490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16; 6491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* To avoid memory leak, free the offset buffer if necessary. */ 6494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_freeOffsetBuffer(&s); 6495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE & UCOL_PRIMARYMASK; 6497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) { 6500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return coll->variableTopValue<<16; 6504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 6507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) { 6508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 6510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) { 6513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->variableTopValueisDefault = FALSE; 6514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16; 6515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Attribute setter API */ 6518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 6519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) { 6520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 6522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6523b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColAttributeValue oldFrench = coll->frenchCollation; 6525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColAttributeValue oldCaseFirst = coll->caseFirst; 6526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(attr) { 6527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_NUMERIC_COLLATION: /* sort substrings of digits as numbers */ 6528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(value == UCOL_ON) { 6529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollation = UCOL_ON; 6530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollationisDefault = FALSE; 6531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollation = UCOL_OFF; 6533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollationisDefault = FALSE; 6534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollationisDefault = TRUE; 6536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollation = (UColAttributeValue)coll->options->numericCollation; 6537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_HIRAGANA_QUATERNARY_MODE: /* special quaternary values for Hiragana */ 6542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(value == UCOL_ON) { 6543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQ = UCOL_ON; 6544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQisDefault = FALSE; 6545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQ = UCOL_OFF; 6547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQisDefault = FALSE; 6548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQisDefault = TRUE; 6550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQ = (UColAttributeValue)coll->options->hiraganaQ; 6551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/ 6556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_ON) { 6557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollation = UCOL_ON; 6558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollationisDefault = FALSE; 6559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollation = UCOL_OFF; 6561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollationisDefault = FALSE; 6562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollationisDefault = TRUE; 6564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollation = (UColAttributeValue)coll->options->frenchCollation; 6565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 6567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ 6570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_SHIFTED) { 6571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandling = UCOL_SHIFTED; 6572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandlingisDefault = FALSE; 6573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_NON_IGNORABLE) { 6574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandling = UCOL_NON_IGNORABLE; 6575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandlingisDefault = FALSE; 6576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandlingisDefault = TRUE; 6578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandling = (UColAttributeValue)coll->options->alternateHandling ; 6579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 6581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ 6584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_LOWER_FIRST) { 6585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirst = UCOL_LOWER_FIRST; 6586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirstisDefault = FALSE; 6587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_UPPER_FIRST) { 6588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirst = UCOL_UPPER_FIRST; 6589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirstisDefault = FALSE; 6590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->caseFirst = UCOL_OFF; 6592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->caseFirstisDefault = FALSE; 6593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirst = (UColAttributeValue)coll->options->caseFirst; 6595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirstisDefault = TRUE; 6596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 6598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_CASE_LEVEL: /* do we have an extra case level */ 6601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_ON) { 6602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevel = UCOL_ON; 6603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevelisDefault = FALSE; 6604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevel = UCOL_OFF; 6606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevelisDefault = FALSE; 6607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevel = (UColAttributeValue)coll->options->caseLevel; 6609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevelisDefault = TRUE; 6610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 6612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ 6615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_ON) { 6616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationMode = UCOL_ON; 6617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationModeisDefault = FALSE; 661827f654740f2a26ad62a5c155af9199af9e69b889claireho initializeFCD(status); 6619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationMode = UCOL_OFF; 6621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationModeisDefault = FALSE; 6622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationModeisDefault = TRUE; 6624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationMode = (UColAttributeValue)coll->options->normalizationMode; 662527f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->normalizationMode == UCOL_ON) { 662627f654740f2a26ad62a5c155af9199af9e69b889claireho initializeFCD(status); 662727f654740f2a26ad62a5c155af9199af9e69b889claireho } 6628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 6630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_STRENGTH: /* attribute for strength */ 6633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (value == UCOL_DEFAULT) { 6634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->strengthisDefault = TRUE; 6635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->strength = (UColAttributeValue)coll->options->strength; 6636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value <= UCOL_IDENTICAL) { 6637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->strengthisDefault = FALSE; 6638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->strength = value; 6639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 6641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_ATTRIBUTE_COUNT: 6644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 6645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) { 6649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneRegenTable = TRUE; 6650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneRegenTable = FALSE; 6652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_updateInternalState(coll, status); 6654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UColAttributeValue U_EXPORT2 6657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) { 6658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_DEFAULT; 6660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(attr) { 6662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_NUMERIC_COLLATION: 6663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->numericCollation; 6664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_HIRAGANA_QUATERNARY_MODE: 6665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->hiraganaQ; 6666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/ 6667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->frenchCollation; 6668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ 6669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->alternateHandling; 6670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ 6671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->caseFirst; 6672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_CASE_LEVEL: /* do we have an extra case level */ 6673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->caseLevel; 6674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ 6675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->normalizationMode; 6676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_STRENGTH: /* attribute for strength */ 6677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->strength; 6678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_ATTRIBUTE_COUNT: 6679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 6680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_DEFAULT; 6684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 6687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setStrength( UCollator *coll, 6688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationStrength strength) 6689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 6690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 6691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); 6692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationStrength U_EXPORT2 6695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getStrength(const UCollator *coll) 6696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 6697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 6698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return ucol_getAttribute(coll, UCOL_STRENGTH, &status); 6699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6701b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_DRAFT int32_t U_EXPORT2 670227f654740f2a26ad62a5c155af9199af9e69b889clairehoucol_getReorderCodes(const UCollator *coll, 670327f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t *dest, 670427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t destCapacity, 6705b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode *status) { 6706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(*status)) { 670727f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 670827f654740f2a26ad62a5c155af9199af9e69b889claireho } 6709b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 671027f654740f2a26ad62a5c155af9199af9e69b889claireho if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { 6711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *status = U_ILLEGAL_ARGUMENT_ERROR; 671227f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 671327f654740f2a26ad62a5c155af9199af9e69b889claireho } 6714b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6715b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#ifdef UCOL_DEBUG 6716b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho printf("coll->reorderCodesLength = %d\n", coll->reorderCodesLength); 6717b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho printf("coll->defaultReorderCodesLength = %d\n", coll->defaultReorderCodesLength); 6718b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif 6719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 672027f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->reorderCodesLength > destCapacity) { 6721b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *status = U_BUFFER_OVERFLOW_ERROR; 672227f654740f2a26ad62a5c155af9199af9e69b889claireho return coll->reorderCodesLength; 672327f654740f2a26ad62a5c155af9199af9e69b889claireho } 672427f654740f2a26ad62a5c155af9199af9e69b889claireho for (int32_t i = 0; i < coll->reorderCodesLength; i++) { 672527f654740f2a26ad62a5c155af9199af9e69b889claireho dest[i] = coll->reorderCodes[i]; 672627f654740f2a26ad62a5c155af9199af9e69b889claireho } 672727f654740f2a26ad62a5c155af9199af9e69b889claireho return coll->reorderCodesLength; 672827f654740f2a26ad62a5c155af9199af9e69b889claireho} 672927f654740f2a26ad62a5c155af9199af9e69b889claireho 6730b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_DRAFT void U_EXPORT2 6731b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoucol_setReorderCodes(UCollator* coll, 6732b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const int32_t* reorderCodes, 673327f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t reorderCodesLength, 6734b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode *status) { 6735b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(*status)) { 673627f654740f2a26ad62a5c155af9199af9e69b889claireho return; 673727f654740f2a26ad62a5c155af9199af9e69b889claireho } 673827f654740f2a26ad62a5c155af9199af9e69b889claireho 673927f654740f2a26ad62a5c155af9199af9e69b889claireho if (reorderCodesLength < 0 || (reorderCodesLength > 0 && reorderCodes == NULL)) { 6740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *status = U_ILLEGAL_ARGUMENT_ERROR; 674127f654740f2a26ad62a5c155af9199af9e69b889claireho return; 674227f654740f2a26ad62a5c155af9199af9e69b889claireho } 674327f654740f2a26ad62a5c155af9199af9e69b889claireho 6744b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) { 6745b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(coll->reorderCodes); 6746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 674727f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodes = NULL; 674827f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodesLength = 0; 674927f654740f2a26ad62a5c155af9199af9e69b889claireho if (reorderCodesLength == 0) { 6750b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) { 6751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(coll->leadBytePermutationTable); 6752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 675327f654740f2a26ad62a5c155af9199af9e69b889claireho coll->leadBytePermutationTable = NULL; 675427f654740f2a26ad62a5c155af9199af9e69b889claireho return; 675527f654740f2a26ad62a5c155af9199af9e69b889claireho } 675627f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodes = (int32_t*) uprv_malloc(reorderCodesLength * sizeof(int32_t)); 675727f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->reorderCodes == NULL) { 6758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *status = U_MEMORY_ALLOCATION_ERROR; 675927f654740f2a26ad62a5c155af9199af9e69b889claireho return; 676027f654740f2a26ad62a5c155af9199af9e69b889claireho } 6761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho coll->freeReorderCodesOnClose = TRUE; 676227f654740f2a26ad62a5c155af9199af9e69b889claireho for (int32_t i = 0; i < reorderCodesLength; i++) { 676327f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodes[i] = reorderCodes[i]; 676427f654740f2a26ad62a5c155af9199af9e69b889claireho } 676527f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodesLength = reorderCodesLength; 6766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ucol_buildPermutationTable(coll, status); 6767b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 6768b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_DRAFT int32_t U_EXPORT2 6770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoucol_getEquivalentReorderCodes(int32_t reorderCode, 6771b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t* dest, 6772b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t destCapacity, 6773b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode *pErrorCode) { 6774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bool equivalentCodesSet[USCRIPT_CODE_LIMIT]; 6775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint16_t leadBytes[256]; 6776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int leadBytesCount; 6777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int leadByteIndex; 6778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int16_t reorderCodesForLeadByte[USCRIPT_CODE_LIMIT]; 6779b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int reorderCodesForLeadByteCount; 6780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int reorderCodeIndex; 6781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6782b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t equivalentCodesCount = 0; 6783b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int setIndex; 6784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 678527f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(*pErrorCode)) { 6786b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 6787b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6788b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6789b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { 6790b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 6791b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 6792b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6793b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6794b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memset(equivalentCodesSet, 0, USCRIPT_CODE_LIMIT * sizeof(bool)); 6795b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6796b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UCollator* uca = ucol_initUCA(pErrorCode); 6797b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(*pErrorCode)) { 6798b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 6799b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6800b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho leadBytesCount = ucol_getLeadBytesForReorderCode(uca, reorderCode, leadBytes, 256); 6801b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (leadByteIndex = 0; leadByteIndex < leadBytesCount; leadByteIndex++) { 6802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho reorderCodesForLeadByteCount = ucol_getReorderCodesForLeadByte( 6803b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uca, leadBytes[leadByteIndex], reorderCodesForLeadByte, USCRIPT_CODE_LIMIT); 6804b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodesForLeadByteCount; reorderCodeIndex++) { 6805b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho equivalentCodesSet[reorderCodesForLeadByte[reorderCodeIndex]] = true; 6806b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6807b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6809b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) { 6810b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (equivalentCodesSet[setIndex] == true) { 6811b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho equivalentCodesCount++; 6812b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6814b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6815b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (destCapacity == 0) { 6816b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return equivalentCodesCount; 6817b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 6819b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho equivalentCodesCount = 0; 6820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) { 6821b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (equivalentCodesSet[setIndex] == true) { 6822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho dest[equivalentCodesCount++] = setIndex; 6823b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (equivalentCodesCount >= destCapacity) { 6824b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 6825b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6827b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 6828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return equivalentCodesCount; 682927f654740f2a26ad62a5c155af9199af9e69b889claireho} 683027f654740f2a26ad62a5c155af9199af9e69b889claireho 683127f654740f2a26ad62a5c155af9199af9e69b889claireho 6832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 6833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are misc functions */ 6834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there are new APIs and some compatibility APIs */ 6835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 6836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 6838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getVersion(const UCollator* coll, 6839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVersionInfo versionInfo) 6840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 6841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* RunTime version */ 6842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t rtVersion = UCOL_RUNTIME_VERSION; 6843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Builder version*/ 6844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t bdVersion = coll->image->version[0]; 6845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Charset Version. Need to get the version from cnv files 6847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * makeconv should populate cnv files with version and 6848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an api has to be provided in ucnv.h to obtain this version 6849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 6850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t csVersion = 0; 6851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* combine the version info */ 6853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t cmbVersion = (uint16_t)((rtVersion<<11) | (bdVersion<<6) | (csVersion)); 6854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Tailoring rules */ 6856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru versionInfo[0] = (uint8_t)(cmbVersion>>8); 6857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru versionInfo[1] = (uint8_t)cmbVersion; 6858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru versionInfo[2] = coll->image->version[1]; 6859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->UCA) { 6860b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* Include the minor number when getting the UCA version. (major & 1f) << 3 | (minor & 7) */ 6861b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru versionInfo[3] = (coll->UCA->image->UCAVersion[0] & 0x1f) << 3 | (coll->UCA->image->UCAVersion[1] & 0x07); 6862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru versionInfo[3] = 0; 6864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This internal API checks whether a character is tailored or not */ 6869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 6870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) { 6871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL || coll == coll->UCA) { 6872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 6873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t CE = UCOL_NOT_FOUND; 6876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *ContractionStart = NULL; 6877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(u < 0x100) { /* latin-1 */ 6878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = coll->latinOneMapping[u]; 6879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) { 6880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 6881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* regular */ 6883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u); 6884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isContraction(CE)) { 6887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ContractionStart = (UChar *)coll->image+getContractOffset(CE); 6888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex)); 6889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(CE != UCOL_NOT_FOUND); 6892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 6896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the string compare functions */ 6897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 6898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 6899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_checkIdent internal function. Does byte level string compare. */ 6902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Used by strcoll if strength == identical and strings */ 690350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* are otherwise equal. */ 6904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 6905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Comparison must be done on NFD normalized strings. */ 6906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* FCD is not good enough. */ 6907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 6909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status) 6910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 691150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // When we arrive here, we can have normal strings or UCharIterators. Currently they are both 691250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of same type, but that doesn't really mean that it will stay that way. 6913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t comparison; 6914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sColl->flags & UCOL_USE_ITERATOR) { 691650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The division for the array length may truncate the array size to 691750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high 691850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // for all platforms anyway. 691950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 692050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 6921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UNormIterator *sNIt = NULL, *tNIt = NULL; 6922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status); 6923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status); 6924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl->iterator->move(sColl->iterator, 0, UITER_START); 6925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl->iterator->move(tColl->iterator, 0, UITER_START); 6926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, status); 6927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, status); 6928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru comparison = u_strCompareIter(sIt, tIt, TRUE); 6929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(sNIt); 6930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(tNIt); 6931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 693250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sLen = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl->endp - sColl->string) : -1; 693350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *sBuf = sColl->string; 693450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t tLen = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl->endp - tColl->string) : -1; 693550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *tBuf = tColl->string; 6936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (normalize) { 6938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ZERO_ERROR; 693950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note: We could use Normalizer::compare() or similar, but for short strings 694050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // which may not be in FCD it might be faster to just NFD them. 694150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather than 694250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // NFD'ing immediately might be faster for long strings, 694350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // but string comparison is usually done on relatively short strings. 694450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN) == 0, sBuf, sLen), 694550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sColl->writableBuffer, 694650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status); 694750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN) == 0, tBuf, tLen), 694850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tColl->writableBuffer, 694950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status); 695050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 695150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_LESS; 6952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 695350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writableBuffer); 6954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 695550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE); 6956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (comparison < 0) { 6960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_LESS; 6961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (comparison == 0) { 6962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_EQUAL; 6963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* comparison > 0 */ { 6964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_GREATER; 6965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* CEBuf - A struct and some inline functions to handle the saving */ 6969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* of CEs in a buffer within ucol_strcoll */ 6970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_CEBUF_SIZE 512 6972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct ucol_CEBuf { 6973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *buf; 6974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *endp; 6975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *pos; 6976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t localArray[UCOL_CEBUF_SIZE]; 6977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} ucol_CEBuf; 6978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 6981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void UCOL_INIT_CEBUF(ucol_CEBuf *b) { 6982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (b)->buf = (b)->pos = (b)->localArray; 6983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (b)->endp = (b)->buf + UCOL_CEBUF_SIZE; 6984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 6987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) { 6988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t oldSize; 6989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newSize; 6990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *newBuf; 6991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ci->flags |= UCOL_ITER_ALLOCATED; 699350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldSize = (uint32_t)(b->pos - b->buf); 6994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newSize = oldSize * 2; 6995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t)); 6996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newBuf == NULL) { 6997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 6998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 7000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t)); 7001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (b->buf != b->localArray) { 7002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(b->buf); 7003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru b->buf = newBuf; 7005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru b->endp = b->buf + newSize; 7006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru b->pos = b->buf + oldSize; 7007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 7011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruinline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCode *status) { 7012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b->pos == b->endp) { 7013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_CEBuf_Expand(b, ci, status); 7014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_SUCCESS(*status)) { 7016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(b)->pos++ = ce; 7017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is a trick string compare function that goes in and uses sortkeys to compare */ 7021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* It is used when compare gets in trouble and needs to bail out */ 7022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UCollationResult ucol_compareUsingSortKeys(collIterate *sColl, 7023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate *tColl, 7024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 7025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 7026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER]; 7027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *sourceKeyP = sourceKey; 7028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *targetKeyP = targetKey; 7029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER; 7030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *coll = sColl->coll; 703150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *source = NULL; 703250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *target = NULL; 7033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = UCOL_EQUAL; 703450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString sourceString, targetString; 703550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sourceLength; 703650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t targetLength; 7037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sColl->flags & UCOL_USE_ITERATOR) { 7039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sColl->iterator->move(sColl->iterator, 0, UITER_START); 7040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tColl->iterator->move(tColl->iterator, 0, UITER_START); 704150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 704250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((c=sColl->iterator->next(sColl->iterator))>=0) { 704350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sourceString.append((UChar)c); 704450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 704550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((c=tColl->iterator->next(tColl->iterator))>=0) { 704650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho targetString.append((UChar)c); 704750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 704850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source = sourceString.getBuffer(); 704950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sourceLength = sourceString.length(); 705050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho target = targetString.getBuffer(); 705150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho targetLength = targetString.length(); 7052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // no iterators 705350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sColl->string):-1; 705450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tColl->string):-1; 7055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = sColl->string; 7056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru target = tColl->string; 7057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen); 7062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sourceKeyLen > UCOL_MAX_BUFFER) { 7063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sourceKeyP = (uint8_t*)uprv_malloc(sourceKeyLen*sizeof(uint8_t)); 7064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sourceKeyP == NULL) { 7065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 7066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto cleanup_and_do_compare; 7067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen); 7069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen); 7072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(targetKeyLen > UCOL_MAX_BUFFER) { 7073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targetKeyP = (uint8_t*)uprv_malloc(targetKeyLen*sizeof(uint8_t)); 7074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(targetKeyP == NULL) { 7075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 7076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto cleanup_and_do_compare; 7077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen); 7079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP); 7082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querucleanup_and_do_compare: 7084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sourceKeyP != NULL && sourceKeyP != sourceKey) { 7085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(sourceKeyP); 7086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(targetKeyP != NULL && targetKeyP != targetKey) { 7089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(targetKeyP); 7090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result<0) { 7093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_LESS; 7094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(result>0) { 7095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_GREATER; 7096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_EQUAL; 7098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 710250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult 710350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status) 7104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 7105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ALIGN_CODE(16); 7106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *coll = sColl->coll; 7108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // setting up the collator parameters 7111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColAttributeValue strength = coll->strength; 7112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool initialCheckSecTer = (strength >= UCOL_SECONDARY); 7113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkSecTer = initialCheckSecTer; 7115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkTertiary = (strength >= UCOL_TERTIARY); 7116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkQuad = (strength >= UCOL_QUATERNARY); 7117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkIdent = (strength == UCOL_IDENTICAL); 7118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkCase = (coll->caseLevel == UCOL_ON); 7119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer; 7120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool shifted = (coll->alternateHandling == UCOL_SHIFTED); 7121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool qShifted = shifted && checkQuad; 7122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad; 7123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(doHiragana && shifted) { 7125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (ucol_compareUsingSortKeys(sColl, tColl, status)); 7126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseSwitch = coll->caseSwitch; 7128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryMask = coll->tertiaryMask; 7129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is the lowest primary value that will not be ignored if shifted 7131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0; 7132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationResult result = UCOL_EQUAL; 7134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationResult hirResult = UCOL_EQUAL; 7135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Preparing the CE buffers. They will be filled during the primary phase 7137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_CEBuf sCEs; 7138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_CEBuf tCEs; 7139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_INIT_CEBUF(&sCEs); 7140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_INIT_CEBUF(&tCEs); 7141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t secS = 0, secT = 0; 7143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sOrder=0, tOrder=0; 7144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Non shifted primary processing is quite simple 7146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!shifted) { 7147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 7148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We fetch CEs until we hit a non ignorable primary or end. 7150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 7151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We get the next CE 7152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_IGetNextCE(coll, sColl, status); 7153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Stuff it in the buffer 7154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // And keep just the primary part. 7156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sOrder &= UCOL_PRIMARYMASK; 7157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while(sOrder == 0); 7158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // see the comments on the above block 7160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 7161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_IGetNextCE(coll, tColl, status); 7162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder &= UCOL_PRIMARYMASK; 7164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while(tOrder == 0); 7165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if both primaries are the same 7167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 7168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // and there are no more CEs, we advance to the next level 7169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == UCOL_NO_MORE_CES_PRIMARY) { 7170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(doHiragana && hirResult == UCOL_EQUAL) { 7173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) { 7174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA)) 7175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ? UCOL_LESS:UCOL_GREATER; 7176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 717927f654740f2a26ad62a5c155af9199af9e69b889claireho // only need to check one for continuation 718027f654740f2a26ad62a5c155af9199af9e69b889claireho // if one is then the other must be or the preceding CE would be a prefix of the other 718127f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->leadBytePermutationTable != NULL && !isContinuation(sOrder)) { 718227f654740f2a26ad62a5c155af9199af9e69b889claireho sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF); 718327f654740f2a26ad62a5c155af9199af9e69b889claireho tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF); 718427f654740f2a26ad62a5c155af9199af9e69b889claireho } 7185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if two primaries are different, we are done 7186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (sOrder < tOrder) ? UCOL_LESS: UCOL_GREATER; 7187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } // no primary difference... do the rest from the buffers 7190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // shifted - do a slightly more complicated processing :) 7191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 7192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool sInShifted = FALSE; 7193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool tInShifted = FALSE; 7194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This version of code can be refactored. However, it seems easier to understand this way. 7195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Source loop. Sam as the target loop. 7196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_IGetNextCE(coll, sColl, status); 7198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == UCOL_NO_MORE_CES) { 7199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) { 7202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* UCA amendment - ignore ignorables that follow shifted code points */ 7203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(isContinuation(sOrder)) { 7205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */ 7206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sInShifted) { 7207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */ 7208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* Just lower level values */ 7215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sInShifted) { 7216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* regular */ 722327f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->leadBytePermutationTable != NULL){ 722427f654740f2a26ad62a5c155af9199af9e69b889claireho sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF); 722527f654740f2a26ad62a5c155af9199af9e69b889claireho } 7226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sOrder & UCOL_PRIMARYMASK) > LVT) { 7227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sOrder & UCOL_PRIMARYMASK) > 0) { 7231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = TRUE; 7232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder &= UCOL_PRIMARYMASK; 7233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = FALSE; 7238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder &= UCOL_PRIMARYMASK; 7244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = FALSE; 7245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_IGetNextCE(coll, tColl, status); 7248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder == UCOL_NO_MORE_CES) { 7249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) { 7252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* UCA amendment - ignore ignorables that follow shifted code points */ 7253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(isContinuation(tOrder)) { 7255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */ 7256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tInShifted) { 7257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */ 7258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* Just lower level values */ 7265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tInShifted) { 7266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* regular */ 727327f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->leadBytePermutationTable != NULL){ 727427f654740f2a26ad62a5c155af9199af9e69b889claireho tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF); 727527f654740f2a26ad62a5c155af9199af9e69b889claireho } 7276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tOrder & UCOL_PRIMARYMASK) > LVT) { 7277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tOrder & UCOL_PRIMARYMASK) > 0) { 7281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = TRUE; 7282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder &= UCOL_PRIMARYMASK; 7283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = FALSE; 7288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder &= UCOL_PRIMARYMASK; 7294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = FALSE; 7295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 7297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 7298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(doHiragana && hirResult == UCOL_EQUAL) { 7299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_HIRAGANA)) { 7300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA)) 7301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ? UCOL_LESS:UCOL_GREATER; 7302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 7305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == UCOL_NO_MORE_CES_PRIMARY) { 7306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = 0; 7309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = 0; 7310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER; 7314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } /* no primary difference... do the rest from the buffers */ 7317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* now, we're gonna reexamine collected CEs */ 7320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *sCE; 7321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *tCE; 7322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* This is the secondary level of comparison */ 7324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(checkSecTer) { 7325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isFrenchSec) { /* normal */ 7326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.buf; 7327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.buf; 7328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (secS == 0) { 7330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE++) & UCOL_SECONDARYMASK; 7331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(secT == 0) { 7334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE++) & UCOL_SECONDARYMASK; 7335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == secT) { 7338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == UCOL_NO_MORE_CES_SECONDARY) { 7339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; secT = 0; 7342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; 7346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* do the French */ 7350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *sCESave = NULL; 7351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *tCESave = NULL; 7352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.pos-2; /* this could also be sCEs-- if needs to be optimized */ 7353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.pos-2; 7354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (secS == 0 && sCE >= sCEs.buf) { 735627f654740f2a26ad62a5c155af9199af9e69b889claireho if(sCESave == NULL) { 7357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE--); 7358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(secS)) { 7359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(isContinuation(secS = *(sCE--))) 7360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ; 7361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* after this, secS has the start of continuation, and sCEs points before that */ 7362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCESave = sCE; /* we save it, so that we know where to come back AND that we need to go forward */ 7363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE+=2; /* need to point to the first continuation CP */ 7364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* However, now you can just continue doing stuff */ 7365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE++); 7368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(secS)) { /* This means we have finished with this cont */ 7369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCESave; /* reset the pointer to before continuation */ 737027f654740f2a26ad62a5c155af9199af9e69b889claireho sCESave = NULL; 737127f654740f2a26ad62a5c155af9199af9e69b889claireho secS = 0; /* Fetch a fresh CE before the continuation sequence. */ 7372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS &= UCOL_SECONDARYMASK; /* remove the continuation bit */ 7376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(secT == 0 && tCE >= tCEs.buf) { 737927f654740f2a26ad62a5c155af9199af9e69b889claireho if(tCESave == NULL) { 7380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE--); 7381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(secT)) { 7382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(isContinuation(secT = *(tCE--))) 7383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ; 7384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* after this, secS has the start of continuation, and sCEs points before that */ 7385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCESave = tCE; /* we save it, so that we know where to come back AND that we need to go forward */ 7386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE+=2; /* need to point to the first continuation CP */ 7387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* However, now you can just continue doing stuff */ 7388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE++); 7391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(secT)) { /* This means we have finished with this cont */ 7392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCESave; /* reset the pointer to before continuation */ 739327f654740f2a26ad62a5c155af9199af9e69b889claireho tCESave = NULL; 739427f654740f2a26ad62a5c155af9199af9e69b889claireho secT = 0; /* Fetch a fresh CE before the continuation sequence. */ 7395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT &= UCOL_SECONDARYMASK; /* remove the continuation bit */ 7399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == secT) { 7402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == UCOL_NO_MORE_CES_SECONDARY || (sCE < sCEs.buf && tCE < tCEs.buf)) { 7403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; secT = 0; 7406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; 7410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* doing the case bit */ 7417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(checkCase) { 7418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.buf; 7419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.buf; 7420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((secS & UCOL_REMOVE_CASE) == 0) { 7422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(*sCE++)) { 7423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS =*(sCE-1); 7424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((secS & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) { 7425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // primary ignorables should not be considered on the case level when the strength is primary 7426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise, the CEs stop being well-formed 7427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS &= UCOL_TERT_CASE_MASK; 7428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS ^= caseSwitch; 7429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((secT & UCOL_REMOVE_CASE) == 0) { 7438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(*tCE++)) { 7439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE-1); 7440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((secT & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) { 7441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // primary ignorables should not be considered on the case level when the strength is primary 7442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise, the CEs stop being well-formed 7443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT &= UCOL_TERT_CASE_MASK; 7444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT ^= caseSwitch; 7445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) { 7454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = UCOL_LESS; 7455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK)) { 7457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = UCOL_GREATER; 7458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY || (secT & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) { 7462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Tertiary level */ 7471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(checkTertiary) { 7472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.buf; 7475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.buf; 7476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((secS & UCOL_REMOVE_CASE) == 0) { 7478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE++) & tertiaryMask; 7479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(secS)) { 7480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS ^= caseSwitch; 7481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS &= UCOL_REMOVE_CASE; 7483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((secT & UCOL_REMOVE_CASE) == 0) { 7487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE++) & tertiaryMask; 7488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(secT)) { 7489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT ^= caseSwitch; 7490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT &= UCOL_REMOVE_CASE; 7492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == secT) { 7496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((secS & UCOL_REMOVE_CASE) == 1) { 7497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; secT = 0; 7500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; 7504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(qShifted /*checkQuad*/) { 7511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool sInShifted = TRUE; 7512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool tInShifted = TRUE; 7513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.buf; 7516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.buf; 7517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 751827f654740f2a26ad62a5c155af9199af9e69b889claireho while((secS == 0 && secS != UCOL_NO_MORE_CES) || (isContinuation(secS) && !sInShifted)) { 7519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE++); 7520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(secS)) { 7521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!sInShifted) { 7522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(secS > LVT || (secS & UCOL_PRIMARYMASK) == 0) { /* non continuation */ 7525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = UCOL_PRIMARYMASK; 7526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = FALSE; 7527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = TRUE; 7529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS &= UCOL_PRIMARYMASK; 7532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 753427f654740f2a26ad62a5c155af9199af9e69b889claireho while((secT == 0 && secT != UCOL_NO_MORE_CES) || (isContinuation(secT) && !tInShifted)) { 7535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE++); 7536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(secT)) { 7537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!tInShifted) { 7538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(secT > LVT || (secT & UCOL_PRIMARYMASK) == 0) { 7541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = UCOL_PRIMARYMASK; 7542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = FALSE; 7543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = TRUE; 7545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT &= UCOL_PRIMARYMASK; 7548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == secT) { 7550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == UCOL_NO_MORE_CES_PRIMARY) { 7551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; secT = 0; 7554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; 7558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(doHiragana && hirResult != UCOL_EQUAL) { 7562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If we're fine on quaternaries, we might be different 7563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // on Hiragana. This, however, might fail us in shifted. 7564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = hirResult; 7565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* For IDENTICAL comparisons, we use a bitwise character comparison */ 7569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* as a tiebreaker if all else is equal. */ 7570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Getting here should be quite rare - strings are not identical - */ 7571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* that is checked first, but compared == through all other checks. */ 7572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(checkIdent) 7573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 7574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //result = ucol_checkIdent(&sColl, &tColl, coll->normalizationMode == UCOL_ON); 7575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ucol_checkIdent(sColl, tColl, TRUE, status); 7576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucommonReturn: 7579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((sColl->flags | tColl->flags) & UCOL_ITER_ALLOCATED) { 7580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sCEs.buf != sCEs.localArray ) { 7581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(sCEs.buf); 7582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (tCEs.buf != tCEs.localArray ) { 7584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(tCEs.buf); 7585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 7589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 759150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult 759250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoucol_strcollRegular(const UCollator *coll, 759350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *source, int32_t sourceLength, 759450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *target, int32_t targetLength, 759550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 759650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collIterate sColl, tColl; 759750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Preparing the context objects for iterating over strings 759850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, source, sourceLength, &sColl, status); 759950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, target, targetLength, &tColl, status); 760050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 760150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_LESS; 760250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 760350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(&sColl, &tColl, status); 760450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 7605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline uint32_t 7607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getLatinOneContraction(const UCollator *coll, int32_t strength, 7608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t CE, const UChar *s, int32_t *index, int32_t len) 7609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 7610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF); 7611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t latinOneOffset = (CE & 0x00FFF000) >> 12; 7612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t offset = 1; 7613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar = 0, tchar = 0; 7614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(len == -1) { 7617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(s[*index] == 0) { // end of string 7618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); 7619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = s[*index]; 7621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(*index == len) { 7624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); 7625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = s[*index]; 7627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(schar > (tchar = *(UCharOffset+offset))) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ 7631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru offset++; 7632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (schar == tchar) { 7635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (*index)++; 7636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset+offset]); 7637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 7639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 7640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(schar & 0xFF00 /*> UCOL_ENDOFLATIN1RANGE*/) { 7641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_BAIL_OUT_CE; 7642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // skip completely ignorables 7644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar); 7645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isZeroCE == 0) { // we have to ignore completely ignorables 7646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (*index)++; 7647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); 7651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 7657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is a fast strcoll, geared towards text in Latin-1. 7658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It supports contractions of size two, French secondaries 7659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and case switching. You can use it with strengths primary 7660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to tertiary. It does not support shifted and case level. 7661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It relies on the table build by setupLatin1Table. If it 7662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * doesn't understand something, it will go to the regular 7663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * strcoll. 7664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 766550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult 7666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcollUseLatin1( const UCollator *coll, 7667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 7668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sLen, 7669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 7670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tLen, 7671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 7672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 7673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ALIGN_CODE(16); 7674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t strength = coll->strength; 7675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sIndex = 0, tIndex = 0; 7677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar sChar = 0, tChar = 0; 7678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sOrder=0, tOrder=0; 7679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool endOfSource = FALSE; 7681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *elements = coll->latinOneCEs; 7683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool haveContractions = FALSE; // if we have contractions in our string 7685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we cannot do French secondary 7686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do the primary level 7688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 7689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(sOrder==0) { // this loop skips primary ignorables 7690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // sOrder=getNextlatinOneCE(source); 7691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sLen==-1) { // handling zero terminated strings 7692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[sIndex++]; 7693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sChar==0) { 7694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 7695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // handling strings with known length 7698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIndex==sLen) { 7699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 7700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[sIndex++]; 7703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32) 7705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "R"); 770650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 7707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sOrder = elements[sChar]; 7709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder >= UCOL_NOT_FOUND) { // if we got a special 7710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // specials can basically be either contractions or bail-out signs. If we get anything 7711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // else, we'll bail out anywasy 7712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(getCETag(sOrder) == CONTRACTION_TAG) { 7713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOrder, source, &sIndex, sLen); 7714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru haveContractions = TRUE; // if there are contractions, we cannot do French secondary 7715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // However, if there are contractions in the table, but we always use just one char, 7716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we might be able to do French. This should be checked out. 7717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { 7719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "S"); 772050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 7721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(tOrder==0) { // this loop skips primary ignorables 7726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // tOrder=getNextlatinOneCE(target); 7727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tLen==-1) { // handling zero terminated strings 7728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[tIndex++]; 7729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tChar==0) { 7730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { // this is different than source loop, 7731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // as we already know that source loop is done here, 7732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // so we can either finish the primary loop if both 7733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // strings are done or anounce the result if only 7734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // target is done. Same below. 7735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto endOfPrimLoop; 7736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // handling strings with known length 7741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tIndex==tLen) { 7742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 7743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto endOfPrimLoop; 7744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[tIndex++]; 7749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32) 7751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "R"); 775250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 7753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tOrder = elements[tChar]; 7755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder >= UCOL_NOT_FOUND) { 7756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Handling specials, see the comments for source 7757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(getCETag(tOrder) == CONTRACTION_TAG) { 7758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOrder, target, &tIndex, tLen); 7759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru haveContractions = TRUE; 7760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { 7762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "S"); 776350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 7764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { // source is finished, but target is not, say the result. 7768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 7769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { // if we have same CEs, we continue the loop 7772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sOrder = 0; tOrder = 0; 7773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 7774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // compare current top bytes 7776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(((sOrder^tOrder)&0xFF000000)!=0) { 7777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // top bytes differ, return difference 7778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder < tOrder) { 7779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 7780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder > tOrder) { 7781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24); 7784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // since we must return enum value 7785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // top bytes match, continue with following bytes 7788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sOrder<<=8; 7789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tOrder<<=8; 7790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruendOfPrimLoop: 7794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // after primary loop, we definitely know the sizes of strings, 7795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // so we set it and use simpler loop for secondaries and tertiaries 7796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sLen = sIndex; tLen = tIndex; 7797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_SECONDARY) { 7798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // adjust the table beggining 7799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elements += coll->latinOneTableLen; 7800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = FALSE; 7801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->frenchCollation == UCOL_OFF) { // non French 7803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This loop is a simplified copy of primary loop 7804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // at this point we know that whole strings are latin-1, so we don't 7805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // check for that. We also know that we only have contractions as 7806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // specials. 7807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sIndex = 0; tIndex = 0; 7808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(sOrder==0) { 7810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIndex==sLen) { 7811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 7812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[sIndex++]; 7815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = elements[sChar]; 7816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder > UCOL_NOT_FOUND) { 7817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, sOrder, source, &sIndex, sLen); 7818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(tOrder==0) { 7822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tIndex==tLen) { 7823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 7824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto endOfSecLoop; 7825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[tIndex++]; 7830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = elements[tChar]; 7831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder > UCOL_NOT_FOUND) { 7832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, tOrder, target, &tIndex, tLen); 7833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 7836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 7837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 7840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = 0; tOrder = 0; 7841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // see primary loop for comments on this 7844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((sOrder^tOrder)&0xFF000000)!=0) { 7845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder < tOrder) { 7846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 7847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder > tOrder) { 7848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder<<=8; 7852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder<<=8; 7853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // French 7856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(haveContractions) { // if we have contractions, we have to bail out 7857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // since we don't really know how to handle them here 785850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 7859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // For French, we go backwards 7861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sIndex = sLen; tIndex = tLen; 7862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(sOrder==0) { 7864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIndex==0) { 7865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 7866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[--sIndex]; 7869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = elements[sChar]; 7870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // don't even look for contractions 7871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(tOrder==0) { 7874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tIndex==0) { 7875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 7876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto endOfSecLoop; 7877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[--tIndex]; 7882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = elements[tChar]; 7883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // don't even look for contractions 7884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 7886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 7887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 7890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = 0; tOrder = 0; 7891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // see the primary loop for comments 7894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((sOrder^tOrder)&0xFF000000)!=0) { 7895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder < tOrder) { 7896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 7897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder > tOrder) { 7898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder<<=8; 7902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder<<=8; 7903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruendOfSecLoop: 7909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strength >= UCOL_TERTIARY) { 7910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // tertiary loop is the same as secondary (except no French) 7911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elements += coll->latinOneTableLen; 7912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sIndex = 0; tIndex = 0; 7913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = FALSE; 7914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(sOrder==0) { 7916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIndex==sLen) { 7917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 7918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[sIndex++]; 7921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = elements[sChar]; 7922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder > UCOL_NOT_FOUND) { 7923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sOrder, source, &sIndex, sLen); 7924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(tOrder==0) { 7927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tIndex==tLen) { 7928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 7929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; // if both strings are at the end, they are equal 7930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[tIndex++]; 7935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = elements[tChar]; 7936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder > UCOL_NOT_FOUND) { 7937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tOrder, target, &tIndex, tLen); 7938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(endOfSource) { 7941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 7942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 7944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = 0; tOrder = 0; 7945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((sOrder^tOrder)&0xff000000)!=0) { 7948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder < tOrder) { 7949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 7950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder > tOrder) { 7951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 7952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder<<=8; 7955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder<<=8; 7956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_EQUAL; 7960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationResult U_EXPORT2 7964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcollIter( const UCollator *coll, 7965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharIterator *sIter, 7966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharIterator *tIter, 7967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) 7968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 7969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!status || U_FAILURE(*status)) { 7970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 7971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER); 7974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter); 7975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (sIter == tIter) { 7977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) 7978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 7979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIter == NULL || tIter == NULL || coll == NULL) { 7981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 7982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) 7983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 7984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationResult result = UCOL_EQUAL; 7987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Preparing the context objects for iterating over strings 7989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate sColl, tColl; 799050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, NULL, -1, &sColl, status); 799150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, NULL, -1, &tColl, status); 799250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 799350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) 799450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_EQUAL; 799550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 7996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The division for the array length may truncate the array size to 7997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high 7998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // for all platforms anyway. 7999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 8000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 8001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UNormIterator *sNormIter = NULL, *tNormIter = NULL; 8002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl.iterator = sIter; 8004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl.flags |= UCOL_USE_ITERATOR; 8005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl.flags |= UCOL_USE_ITERATOR; 8006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl.iterator = tIter; 8007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) { 8009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status); 8010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status); 8011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl.flags &= ~UCOL_ITER_NORM; 8012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status); 8014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status); 8015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl.flags &= ~UCOL_ITER_NORM; 8016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL; 8019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((sChar = sColl.iterator->next(sColl.iterator)) == 8021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (tChar = tColl.iterator->next(tColl.iterator))) { 8022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sChar == U_SENTINEL) { 8023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = UCOL_EQUAL; 8024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto end_compare; 8025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sChar == U_SENTINEL) { 8029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar = tColl.iterator->previous(tColl.iterator); 8030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tChar == U_SENTINEL) { 8033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sChar = sColl.iterator->previous(sColl.iterator); 8034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar = sColl.iterator->previous(sColl.iterator); 8037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar = tColl.iterator->previous(tColl.iterator); 8038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ucol_unsafeCP((UChar)sChar, coll) || ucol_unsafeCP((UChar)tChar, coll)) 8040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 8041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We are stopped in the middle of a contraction. 8042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Scan backwards through the == part of the string looking for the start of the contraction. 8043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // It doesn't matter which string we scan, since they are the same in this region. 8044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do 8045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 8046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar = sColl.iterator->previous(sColl.iterator); 8047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar = tColl.iterator->previous(tColl.iterator); 8048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll)); 8050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 8054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = ucol_strcollRegular(&sColl, &tColl, status); 8055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruend_compare: 8058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sNormIter || tNormIter) { 8059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(sNormIter); 8060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(tNormIter); 8061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE_STATUS(result, *status) 8064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 8065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 8069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_strcoll Main public API string comparison function */ 8070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 8071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationResult U_EXPORT2 8072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcoll( const UCollator *coll, 8073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 8075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t targetLength) 8077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 8078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ALIGN_CODE(16); 8079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); 8081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 8082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); 8083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength); 8084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength); 8085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(source == NULL || target == NULL) { 8088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // do not crash, but return. Should have 8089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // status argument to return error. 8090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE(UCOL_EQUAL); 8091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Quick check if source and target are same strings. */ 8095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* They should either both be NULL terminated or the explicit length should be set on both. */ 8096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source==target && sourceLength==targetLength) { 8097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE(UCOL_EQUAL); 8098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Scan the strings. Find: */ 8102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* The length of any leading portion that is equal */ 8103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Whether they are exactly equal. (in which case we just return) */ 8104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrc = source; 8105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pTarg = target; 8106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t equalLength; 8107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sourceLength == -1 && targetLength == -1) { 8109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Both strings are null terminated. 8110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Scan through any leading equal portion. 8111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (*pSrc == *pTarg && *pSrc != 0) { 8112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc++; 8113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pTarg++; 8114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pSrc == 0 && *pTarg == 0) { 8116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_VALUE(UCOL_EQUAL); 8117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_EQUAL; 8118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 811950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho equalLength = (int32_t)(pSrc - source); 8120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 8122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 8123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // One or both strings has an explicit length. 8124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrcEnd = source + sourceLength; 8125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pTargEnd = target + targetLength; 8126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan while the strings are bitwise ==, or until one is exhausted. 8128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 8129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (pSrc == pSrcEnd || pTarg == pTargEnd) { 8130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLength == -1)) { 8133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (*pSrc != *pTarg) { 8136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pSrc++; 8139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pTarg++; 8140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 814150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho equalLength = (int32_t)(pSrc - source); 8142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If we made it all the way through both strings, we are done. They are == 8144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((pSrc ==pSrcEnd || (pSrcEnd <pSrc && *pSrc==0)) && /* At end of src string, however it was specified. */ 8145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0))) /* and also at end of dest string */ 8146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 8147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE(UCOL_EQUAL); 8148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (equalLength > 0) { 8152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* There is an identical portion at the beginning of the two strings. */ 8153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* If the identical portion ends within a contraction or a comibining */ 8154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* character sequence, back up to the start of that sequence. */ 8155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // These values should already be set by the code above. 8157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //pSrc = source + equalLength; /* point to the first differing chars */ 8158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //pTarg = target + equalLength; 815927f654740f2a26ad62a5c155af9199af9e69b889claireho if ((pSrc != source+sourceLength && ucol_unsafeCP(*pSrc, coll)) || 816027f654740f2a26ad62a5c155af9199af9e69b889claireho (pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll))) 8161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 8162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are stopped in the middle of a contraction. 8163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan backwards through the == part of the string looking for the start of the contraction. 8164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // It doesn't matter which string we scan, since they are the same in this region. 8165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do 8166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 8167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru equalLength--; 8168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc--; 8169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (equalLength>0 && ucol_unsafeCP(*pSrc, coll)); 8171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source += equalLength; 8174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru target += equalLength; 8175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sourceLength > 0) { 8176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sourceLength -= equalLength; 8177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (targetLength > 0) { 8179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targetLength -= equalLength; 8180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 8184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationResult returnVal; 8185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) { 818650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targetLength, &status); 8187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 8188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status); 8189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_VALUE(returnVal); 8191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return returnVal; 8192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */ 8195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 8196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_greater( const UCollator *coll, 8197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 8199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t targetLength) 8201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 8202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 8203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru == UCOL_GREATER); 8204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */ 8207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 8208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_greaterOrEqual( const UCollator *coll, 8209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 8211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t targetLength) 8213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 8214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 8215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru != UCOL_LESS); 8216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */ 8219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 8220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_equal( const UCollator *coll, 8221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 8223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t targetLength) 8225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 8226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 8227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru == UCOL_EQUAL); 8228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 8231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getUCAVersion(const UCollator* coll, UVersionInfo info) { 8232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll && coll->UCA) { 8233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo)); 8234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */ 8238