1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 1996-2010, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: ucol.cpp 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification history 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Comments 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 1996-1999 various members of ICU team maintained C API for collation framework 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 02/16/2001 synwee Added internal method getPrevSpecialCE 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/01/2001 synwee Added maxexpansion functionality. 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/coleitr.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unorm.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_imp.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "bocsu.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unorm_it.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "utracimp.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "putilimp.h" 39c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "uassert.h" 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LAST_BYTE_MASK_ 0xFF 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SECOND_LAST_BYTE_SHIFT_ 8 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define ZERO_CC_LIMIT_ 0xC0 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// this is static pointer to the normalizer fcdTrieIndex 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// it is always the same between calls to u_cleanup 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and therefore writing to it is not synchronized. 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is cleaned in ucol_cleanup 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const uint16_t *fcdTrieIndex=NULL; 59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Code points at fcdHighStart and above have a zero FCD value. 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic UChar32 fcdHighStart = 0; 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// These are values from UCA required for 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// implicit generation and supressing sort key compression 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// they should regularly be in the UCA, but if one 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// is running without UCA, it could be a problem 6627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const int32_t maxRegularPrimary = 0x7A; 67c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t minImplicitPrimary = 0xE0; 68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t maxImplicitPrimary = 0xE4; 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool U_CALLCONV 72c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruucol_cleanup(void) 73c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 74c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fcdTrieIndex = NULL; 75c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t U_CALLCONV 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_getFoldingOffset(uint32_t data) { 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(data&0xFFFFFF); 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8527f654740f2a26ad62a5c155af9199af9e69b889claireho// init FCD data 8627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline 8727f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool initializeFCD(UErrorCode *status) { 8827f654740f2a26ad62a5c155af9199af9e69b889claireho if (fcdTrieIndex != NULL) { 8927f654740f2a26ad62a5c155af9199af9e69b889claireho return TRUE; 9027f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 9127f654740f2a26ad62a5c155af9199af9e69b889claireho // The result is constant, until the library is reloaded. 9227f654740f2a26ad62a5c155af9199af9e69b889claireho fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 9327f654740f2a26ad62a5c155af9199af9e69b889claireho ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup); 9427f654740f2a26ad62a5c155af9199af9e69b889claireho return U_SUCCESS(*status); 9527f654740f2a26ad62a5c155af9199af9e69b889claireho } 9627f654740f2a26ad62a5c155af9199af9e69b889claireho} 9727f654740f2a26ad62a5c155af9199af9e69b889claireho 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 99c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruinline void IInit_collIterate(const UCollator *collator, const UChar *sourceString, 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sourceLen, collIterate *s, 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) 102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (s)->string = (s)->pos = sourceString; 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->origFlags = 0; 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->flags = 0; 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sourceLen >= 0) { 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->flags |= UCOL_ITER_HASLEN; 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->endp = (UChar *)sourceString+sourceLen; 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* change to enable easier checking for end of string for fcdpositon */ 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->endp = NULL; 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->extendCEs = NULL; 115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->extendCEsSize = 0; 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->CEpos = (s)->toReturn = (s)->CEs; 117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->offsetBuffer = NULL; 118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->offsetBufferSize = 0; 119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->offsetReturn = (s)->offsetStore = NULL; 120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0; 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->coll = (collator); 12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (s)->nfd = Normalizer2Factory::getNFDInstance(*status); 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->fcdPosition = 0; 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(collator->normalizationMode == UCOL_ON) { 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->flags |= UCOL_ITER_NORM; 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) { 128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (s)->flags |= UCOL_HIRAGANA_Q; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (s)->iterator = NULL; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //(s)->iteratorIndex = 0; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_init_collIterate(const UCollator *collator, const UChar *sourceString, 13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sourceLen, collIterate *s, 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Out-of-line version for use from other files. */ 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(collator, sourceString, sourceLen, s, status); 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI collIterate * U_EXPORT2 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_new_collIterate(UErrorCode *status) { 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collIterate *s = new collIterate; 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(s == NULL) { 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_MEMORY_ALLOCATION_ERROR; 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return s; 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI void U_EXPORT2 15650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_delete_collIterate(collIterate *s) { 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete s; 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UBool U_EXPORT2 16150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_collIterateAtEnd(collIterate *s) { 16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return s == NULL || s->pos == s->endp; 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Backup the state of the collIterate struct data 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate to backup 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param backup storage 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void backupState(const collIterate *data, collIterateState *backup) 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->fcdPosition = data->fcdPosition; 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->flags = data->flags; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->origFlags = data->origFlags; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->pos = data->pos; 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backup->bufferaddress = data->writableBuffer.getBuffer(); 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backup->buffersize = data->writableBuffer.length(); 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->iteratorMove = 0; 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->iteratorIndex = 0; 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->iterator != NULL) { 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER_CURRENT); 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->iteratorIndex = data->iterator->getState(data->iterator); 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // no we try to fixup if we're using a normalizing iterator and we get UITER_NO_STATE 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(backup->iteratorIndex == UITER_NO_STATE) { 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) { 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backup->iteratorMove++; 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->move(data->iterator, -1, UITER_CURRENT); 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT); 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Loads the state into the collIterate struct data 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate to backup 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param backup storage 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param forwards boolean to indicate if forwards iteration is used, 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* false indicates backwards iteration 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void loadState(collIterate *data, const collIterateState *backup, 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool forwards) 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags = backup->flags; 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->origFlags = backup->origFlags; 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->iterator != NULL) { 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO); 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->setState(data->iterator, backup->iteratorIndex, &status); 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(backup->iteratorMove != 0) { 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT); 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = backup->pos; 217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & UCOL_ITER_INNORMBUF) && 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.getBuffer() != backup->bufferaddress) { 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this is when a new buffer has been reallocated and we'll have to 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru calculate the new position. 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru note the new buffer has to contain the contents of the old buffer. 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forwards) { 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.getTerminatedBuffer() + 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (data->pos - backup->bufferaddress); 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* backwards direction */ 23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t temp = backup->buffersize - 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int32_t)(data->pos - backup->bufferaddress); 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writableBuffer.length() - temp); 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this is alittle tricky. 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if we are initially not in the normalization buffer, even if we 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalize in the later stage, the data in the buffer will be 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ignored, since we skip back up to the data string. 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru however if we are already in the normalization buffer, any 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru further normalization will pull data into the normalization 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer and modify the fcdPosition. 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru since we are keeping the data in the buffer for use, the 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fcdPosition can not be reverted back. 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru arrgghh.... 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition = backup->fcdPosition; 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoreallocCEs(collIterate *data, int32_t newCapacity) { 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t *oldCEs = data->extendCEs; 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(oldCEs == NULL) { 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCEs = data->CEs; 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length = data->CEpos - oldCEs; 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t *newCEs = (uint32_t *)uprv_malloc(newCapacity * 4); 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(newCEs == NULL) { 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_memcpy(newCEs, oldCEs, length * 4); 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(data->extendCEs); 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->extendCEs = newCEs; 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->extendCEsSize = newCapacity; 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->CEpos = newCEs + length; 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoincreaseCEsCapacity(collIterate *data) { 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldCapacity; 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(data->extendCEs != NULL) { 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity = data->extendCEsSize; 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity = LENGTHOF(data->CEs); 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return reallocCEs(data, 2 * oldCapacity); 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoensureCEsCapacity(collIterate *data, int32_t minCapacity) { 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldCapacity; 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(data->extendCEs != NULL) { 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity = data->extendCEsSize; 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity = LENGTHOF(data->CEs); 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(minCapacity <= oldCapacity) { 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldCapacity *= 2; 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacity); 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29827f654740f2a26ad62a5c155af9199af9e69b889clairehovoid collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) { 29927f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(errorCode)) { 30027f654740f2a26ad62a5c155af9199af9e69b889claireho return; 30127f654740f2a26ad62a5c155af9199af9e69b889claireho } 30227f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuffer); 30327f654740f2a26ad62a5c155af9199af9e69b889claireho if(length >= offsetBufferSize) { 30427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE; 30527f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t *newBuffer = reinterpret_cast<int32_t *>(uprv_malloc(newCapacity * 4)); 30627f654740f2a26ad62a5c155af9199af9e69b889claireho if(newBuffer == NULL) { 30727f654740f2a26ad62a5c155af9199af9e69b889claireho errorCode = U_MEMORY_ALLOCATION_ERROR; 30827f654740f2a26ad62a5c155af9199af9e69b889claireho return; 30927f654740f2a26ad62a5c155af9199af9e69b889claireho } 31027f654740f2a26ad62a5c155af9199af9e69b889claireho if(length > 0) { 31127f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_memcpy(newBuffer, offsetBuffer, length * 4); 31227f654740f2a26ad62a5c155af9199af9e69b889claireho } 31327f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(offsetBuffer); 31427f654740f2a26ad62a5c155af9199af9e69b889claireho offsetBuffer = newBuffer; 31527f654740f2a26ad62a5c155af9199af9e69b889claireho offsetStore = offsetBuffer + length; 31627f654740f2a26ad62a5c155af9199af9e69b889claireho offsetBufferSize = newCapacity; 31727f654740f2a26ad62a5c155af9199af9e69b889claireho } 31827f654740f2a26ad62a5c155af9199af9e69b889claireho *offsetStore++ = offset; 31927f654740f2a26ad62a5c155af9199af9e69b889claireho} 32027f654740f2a26ad62a5c155af9199af9e69b889claireho 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collIter_eos() 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Checks for a collIterate being positioned at the end of 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* its source string. 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_eos(collIterate *s) { 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s->flags & UCOL_USE_ITERATOR) { 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !(s->iterator->hasNext(s->iterator)); 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) { 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Null terminated string, but not at null, so not at end. 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Whether in main or normalization buffer doesn't matter. 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // String with length. Can't be in normalization buffer, which is always 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // null termintated. 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s->flags & UCOL_ITER_HASLEN) { 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (s->pos == s->endp); 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are at a null termination, could be either normalization buffer or main string. 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((s->flags & UCOL_ITER_INNORMBUF) == 0) { 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At null at end of main string. 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At null at end of normalization buffer. Need to check whether there there are 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // any characters left in the main buffer. 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s->origFlags & UCOL_USE_ITERATOR) { 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !(s->iterator->hasNext(s->iterator)); 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) { 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Null terminated main string. fcdPosition is the 'return' position into main buf. 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (*s->fcdPosition == 0); 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Main string with an end pointer. 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return s->fcdPosition == s->endp; 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collIter_bos() 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Checks for a collIterate being positioned at the start of 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* its source string. 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_bos(collIterate *source) { 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we're going backwards, we need to know whether there is more in the 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // iterator, even if we are in the side buffer 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) { 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !source->iterator->hasPrevious(source->iterator); 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (source->pos <= source->string || 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((source->flags & UCOL_ITER_INNORMBUF) && 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(source->pos - 1) == 0 && source->fcdPosition == NULL)) { 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_SimpleBos(collIterate *source) { 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we're going backwards, we need to know whether there is more in the 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // iterator, even if we are in the side buffer 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) { 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !source->iterator->hasPrevious(source->iterator); 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (source->pos == source->string) { 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/ 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //return (data->pos == data->string) || 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the open/close functions */ 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UCollator* 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_initFromBinary(const uint8_t *bin, int32_t length, 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *base, 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *fillIn, 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *result = fillIn; 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(base == NULL) { 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we don't support null base yet 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We need these and we could be running without UCA 423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_initImplicitConstants(status); 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCATableHeader *colData = (UCATableHeader *)bin; 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do we want version check here? We're trying to figure out whether collators are compatible 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 || 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0)) || 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru colData->version[0] != UCOL_BUILDER_VERSION) 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_COLLATOR_VERSION_MISMATCH; 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) { 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ucol_initCollator((const UCATableHeader *)bin, result, base, status); 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)){ 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hasRealData = TRUE; 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(base) { 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ucol_initCollator(base->image, result, base, status); 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const UCATableHeader *)bin)->options), status); 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)){ 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hasRealData = FALSE; 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_USELESS_COLLATOR_ERROR; 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeImageOnClose = FALSE; 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->actualLocale = NULL; 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->validLocale = NULL; 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->requestedLocale = NULL; 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->rules = NULL; 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->rulesLength = 0; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeRulesOnClose = FALSE; 463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->ucaRules = NULL; 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollator* U_EXPORT2 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_openBinary(const uint8_t *bin, int32_t length, 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *base, 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ucol_initFromBinary(bin, length, base, NULL, status); 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruucol_cloneBinary(const UCollator *coll, 477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *buffer, int32_t capacity, 478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) 479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t length = 0; 481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return length; 483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(capacity < 0) { 485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return length; 487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->hasRealData == TRUE) { 489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru length = coll->image->size; 490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(length <= capacity) { 491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(buffer, coll->image, length); 492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet))); 497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(length <= capacity) { 498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* build the UCATableHeader with minimal entries */ 499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* do not copy the header from the UCA file because its values are wrong! */ 500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */ 501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* reset everything */ 503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(buffer, 0, length); 504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* set the tailoring-specific values */ 506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCATableHeader *myData = (UCATableHeader *)buffer; 507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->size = length; 508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* offset for the options, the only part of the data that is present after the header */ 510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->options = sizeof(UCATableHeader); 511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* need to always set the expansion value for an upper bound of the options */ 513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->expansion = myData->options + sizeof(UColOptionSet); 514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->magic = UCOL_HEADER_MAGIC; 516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->isBigEndian = U_IS_BIG_ENDIAN; 517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->charSetFamily = U_CHARSET_FAMILY; 518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy UCA's version; genrb will override all but the builder version with tailoring data */ 520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo)); 521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo)); 523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo)); 524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo)); 525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->jamoSpecial = coll->image->jamoSpecial; 526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy the collator options */ 528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet)); 529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return length; 534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollator* U_EXPORT2 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t * pBufferSize, UErrorCode *status) 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator * localCollator; 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator); 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *stackBufferChars = (char *)stackBuffer; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t imageSize = 0; 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rulesSize = 0; 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rulesPadding = 0; 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *image; 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *rules; 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool colAllocated = FALSE; 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool imageAllocated = FALSE; 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status == NULL || U_FAILURE(*status)){ 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((stackBuffer && !pBufferSize) || !coll){ 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coll->rules && coll->freeRulesOnClose) { 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar); 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar)); 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferSizeNeeded += rulesSize + rulesPadding; 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stackBuffer && *pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pBufferSize = bufferSizeNeeded; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Pointers on 64-bit platforms need to be aligned 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * on a 64-bit boundry in memory. 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pBufferSize > offsetUp) { 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pBufferSize -= offsetUp; 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stackBufferChars += offsetUp; 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pBufferSize = 1; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stackBuffer = (void *)stackBufferChars; 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stackBuffer == NULL || *pBufferSize < bufferSizeNeeded) { 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate one here...*/ 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded); 587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Null pointer check. 588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (stackBufferChars == NULL) { 589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru colAllocated = TRUE; 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(*status)) { 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_SAFECLONE_ALLOCATED_WARNING; 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator = (UCollator *)stackBufferChars; 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding); 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode tempStatus = U_ZERO_ERROR; 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus); 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coll->freeImageOnClose) { 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru image = (uint8_t *)uprv_malloc(imageSize); 605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Null pointer check 606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (image == NULL) { 607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_cloneBinary(coll, image, imageSize, status); 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru imageAllocated = TRUE; 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru image = (uint8_t *)coll->image; 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollator, status); 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coll->rules) { 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coll->freeRulesOnClose) { 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->rules = u_strcpy(rules, coll->rules); 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //bufferEnd += rulesSize; 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->rules = coll->rules; 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->freeRulesOnClose = FALSE; 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->rulesLength = coll->rulesLength; 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(coll, (UColAttribute)i, status), status); 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // zero copies of pointers 638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru localCollator->actualLocale = NULL; 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->validLocale = NULL; 640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru localCollator->requestedLocale = NULL; 641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru localCollator->ucaRules = coll->ucaRules; // There should only be one copy here. 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->freeOnClose = colAllocated; 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localCollator->freeImageOnClose = imageAllocated; 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return localCollator; 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_close(UCollator *coll) 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE); 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll); 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll != NULL) { 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // these are always owned by each UCollator struct, 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so we always free them 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->validLocale != NULL) { 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->validLocale); 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->actualLocale != NULL) { 659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(coll->actualLocale); 660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->requestedLocale != NULL) { 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->requestedLocale); 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->latinOneCEs != NULL) { 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->latinOneCEs); 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->options != NULL && coll->freeOptionsOnClose) { 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->options); 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->rules != NULL && coll->freeRulesOnClose) { 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free((UChar *)coll->rules); 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->image != NULL && coll->freeImageOnClose) { 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free((UCATableHeader *)coll->image); 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 67627f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->leadBytePermutationTable != NULL) { 67727f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(coll->leadBytePermutationTable); 67827f654740f2a26ad62a5c155af9199af9e69b889claireho } 67927f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->reorderCodes != NULL) { 68027f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(coll->reorderCodes); 68127f654740f2a26ad62a5c155af9199af9e69b889claireho } 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Here, it would be advisable to close: */ 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* - UData for UCA (unless we stuff it in the root resb */ 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Again, do we need additional housekeeping... HMMM! */ 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose); 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->freeOnClose){ 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* for safeClone, if freeOnClose is FALSE, 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru don't free the other instance data */ 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll); 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT(); 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This one is currently used by genrb & tests. After constructing from rules (tailoring),*/ 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* you should be able to get the binary chunk to write out... Doesn't look very full now */ 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint8_t* U_EXPORT2 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status) 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *result = NULL; 702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->hasRealData == TRUE) { 706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *length = coll->image->size; 707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (uint8_t *)uprv_malloc(*length); 708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result == NULL) { 710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(result, coll->image, *length); 714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet))); 716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (uint8_t *)uprv_malloc(*length); 717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result == NULL) { 719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* build the UCATableHeader with minimal entries */ 724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* do not copy the header from the UCA file because its values are wrong! */ 725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */ 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* reset everything */ 728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(result, 0, *length); 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* set the tailoring-specific values */ 731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCATableHeader *myData = (UCATableHeader *)result; 732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->size = *length; 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* offset for the options, the only part of the data that is present after the header */ 735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->options = sizeof(UCATableHeader); 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* need to always set the expansion value for an upper bound of the options */ 738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->expansion = myData->options + sizeof(UColOptionSet); 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->magic = UCOL_HEADER_MAGIC; 741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->isBigEndian = U_IS_BIG_ENDIAN; 742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->charSetFamily = U_CHARSET_FAMILY; 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy UCA's version; genrb will override all but the builder version with tailoring data */ 745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo)); 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo)); 748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo)); 749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo)); 750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru myData->jamoSpecial = coll->image->jamoSpecial; 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy the collator options */ 753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet)); 754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) { 759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->caseFirst = (UColAttributeValue)opts->caseFirst; 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->caseLevel = (UColAttributeValue)opts->caseLevel; 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->frenchCollation = (UColAttributeValue)opts->frenchCollation; 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->normalizationMode = (UColAttributeValue)opts->normalizationMode; 76627f654740f2a26ad62a5c155af9199af9e69b889claireho if(result->normalizationMode == UCOL_ON && !initializeFCD(status)) { 76727f654740f2a26ad62a5c155af9199af9e69b889claireho return; 76827f654740f2a26ad62a5c155af9199af9e69b889claireho } 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->strength = (UColAttributeValue)opts->strength; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->variableTopValue = opts->variableTopValue; 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->alternateHandling = (UColAttributeValue)opts->alternateHandling; 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ; 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->numericCollation = (UColAttributeValue)opts->numericCollation; 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->caseFirstisDefault = TRUE; 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->caseLevelisDefault = TRUE; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->frenchCollationisDefault = TRUE; 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->normalizationModeisDefault = TRUE; 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->strengthisDefault = TRUE; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->variableTopValueisDefault = TRUE; 78027f654740f2a26ad62a5c155af9199af9e69b889claireho result->alternateHandlingisDefault = TRUE; 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hiraganaQisDefault = TRUE; 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->numericCollationisDefault = TRUE; 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_updateInternalState(result, status); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->options = opts; 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Approximate determination if a character is at a contraction end. 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Guaranteed to be TRUE if a character is at the end of a contraction, 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* otherwise it is not deterministic. 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param c character to be determined 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll collator 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) { 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < coll->minContrEndCP) { 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t hash = c; 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t htbyte; 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { 806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_TRAIL(c)) { 807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru htbyte = coll->contrEndCP[hash>>3]; 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (((htbyte >> (hash & 7)) & 1) == 1); 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* i_getCombiningClass() 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* A fast, at least partly inline version of u_getCombiningClass() 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This is a candidate for further optimization. Used heavily 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* in contraction processing. 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) { 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t sCC = 0; 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) { 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sCC = u_getCombiningClass(c); 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return sCC; 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status) { 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *result = fillIn; 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status) || image == NULL) { 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result == NULL) { 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = (UCollator *)uprv_malloc(sizeof(UCollator)); 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result == NULL) { 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeOnClose = TRUE; 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeOnClose = FALSE; 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->image = image; 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->mapping.getFoldingOffset = _getFoldingOffset; 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosition; 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status); 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result->freeOnClose == TRUE) { 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(result); 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = NULL; 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping); 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->contractionCEs); 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex); 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion); 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->rules = NULL; 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->rulesLength = 0; 868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->freeRulesOnClose = FALSE; 86927f654740f2a26ad62a5c155af9199af9e69b889claireho result->reorderCodes = NULL; 87027f654740f2a26ad62a5c155af9199af9e69b889claireho result->reorderCodesLength = 0; 87127f654740f2a26ad62a5c155af9199af9e69b889claireho result->leadBytePermutationTable = NULL; 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get the version info from UCATableHeader and populate the Collator struct*/ 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/ 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/ 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->dataVersion[2] = 0; 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->dataVersion[3] = 0; 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP; 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->minUnsafeCP = 0; 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (c=0; c<0x300; c++) { // Find the smallest unsafe char. 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ucol_unsafeCP(c, result)) break; 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->minUnsafeCP = c; 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP; 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->minContrEndCP = 0; 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (c=0; c<0x300; c++) { // Find the Contraction-ending char. 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ucol_contractionEndCP(c, result)) break; 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->minContrEndCP = c; 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* max expansion tables */ 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->endExpansionCE = (uint32_t*)((uint8_t*)result->image + 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->image->endExpansionCE); 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->lastEndExpansionCE = result->endExpansionCE + 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->image->endExpansionCECount - 1; 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->expansionCESize = (uint8_t*)result->image + 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->image->expansionCESize; 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //result->errorCode = *status; 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->latinOneCEs = NULL; 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->latinOneRegenTable = FALSE; 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->latinOneFailed = FALSE; 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->UCA = UCA; 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Normally these will be set correctly later. This is the default if you use UCA or the default. */ 911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->ucaRules = NULL; 912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result->actualLocale = NULL; 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->validLocale = NULL; 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->requestedLocale = NULL; 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->hasRealData = FALSE; // real data lives in .dat file... 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result->freeImageOnClose = FALSE; 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 91827f654740f2a26ad62a5c155af9199af9e69b889claireho /* set attributes */ 91927f654740f2a26ad62a5c155af9199af9e69b889claireho ucol_setOptionsFromHeader( 92027f654740f2a26ad62a5c155af9199af9e69b889claireho result, 92127f654740f2a26ad62a5c155af9199af9e69b889claireho (UColOptionSet*)((uint8_t*)result->image+result->image->options), 92227f654740f2a26ad62a5c155af9199af9e69b889claireho status); 92327f654740f2a26ad62a5c155af9199af9e69b889claireho result->freeOptionsOnClose = FALSE; 92427f654740f2a26ad62a5c155af9199af9e69b889claireho 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* new Mark's code */ 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For generation of Implicit CEs 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @author Davis 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Cleaned up so that changes can be made more easily. 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Old values: 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First Implicit: E26A792D 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last Implicit: E3DC70C0 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First CJK: E0030300 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last CJK: E0A9DD00 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First CJK_A: E0A9DF00 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last CJK_A: E0DE3100 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following is a port of Mark's code for new treatment of implicits. 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is positioned here, since ucol_initUCA need to initialize the 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * variables below according to the data in the fractional UCA. 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Function used to: 950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * a) collapse the 2 different Han ranges from UCA into one (in the right order), and 951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * b) bump any non-CJK characters by 10FFFF. 952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The relevant blocks are: 953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * A: 4E00..9FFF; CJK Unified Ideographs 954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * F900..FAFF; CJK Compatibility Ideographs 955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * B: 3400..4DBF; CJK Unified Ideographs Extension A 956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 20000..XX; CJK Unified Ideographs Extension B (and others later on) 957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * As long as 958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * no new B characters are allocated between 4E00 and FAFF, and 959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * no new A characters are outside of this range, 960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (very high probability) this simple code will work. 961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The reordered blocks are: 962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block1 is CJK 963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block2 is CJK_COMPAT_USED 964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block3 is CJK_A 965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (all contiguous) 966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Any other CJK gets its normal code point 967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Any non-CJK gets +10FFFF 968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * When we reorder Block1, we make sure that it is at the very start, 969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * so that it will use a 3-byte form. 970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Warning: the we only pick up the compatibility characters that are 971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * NOT decomposed, so that block is smaller! 972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// CONSTANTS 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NON_CJK_OFFSET = 0x110000, 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 980b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Precomputed by initImplicitConstants() 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final3Multiplier = 0, 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final4Multiplier = 0, 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final3Count = 0, 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final4Count = 0, 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru medialCount = 0, 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min3Primary = 0, 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min4Primary = 0, 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max4Primary = 0, 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru minTrail = 0, 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxTrail = 0, 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max3Trail = 0, 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max4Trail = 0, 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min4Boundary = 0; 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32 99827f654740f2a26ad62a5c155af9199af9e69b889claireho // 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; 99927f654740f2a26ad62a5c155af9199af9e69b889claireho // 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_BASE = 0x4E00, 100127f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_LIMIT = 0x9FCB+1, 100227f654740f2a26ad62a5c155af9199af9e69b889claireho // Unified CJK ideographs in the compatibility ideographs block. 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_COMPAT_USED_BASE = 0xFA0E, 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_COMPAT_USED_LIMIT = 0xFA2F+1, 100527f654740f2a26ad62a5c155af9199af9e69b889claireho // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;; 100627f654740f2a26ad62a5c155af9199af9e69b889claireho // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_A_BASE = 0x3400, 100827f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_A_LIMIT = 0x4DB5+1, 100927f654740f2a26ad62a5c155af9199af9e69b889claireho // 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;; 101027f654740f2a26ad62a5c155af9199af9e69b889claireho // 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;; 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CJK_B_BASE = 0x20000, 101227f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_B_LIMIT = 0x2A6D6+1, 101327f654740f2a26ad62a5c155af9199af9e69b889claireho // 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;; 101427f654740f2a26ad62a5c155af9199af9e69b889claireho // 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;; 101527f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_C_BASE = 0x2A700, 101627f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_C_LIMIT = 0x2B734+1, 101727f654740f2a26ad62a5c155af9199af9e69b889claireho // 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;; 101827f654740f2a26ad62a5c155af9199af9e69b889claireho // 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;; 101927f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_D_BASE = 0x2B740, 102027f654740f2a26ad62a5c155af9199af9e69b889claireho CJK_D_LIMIT = 0x2B81D+1; 102127f654740f2a26ad62a5c155af9199af9e69b889claireho // when adding to this list, look for all occurrences (in project) 102227f654740f2a26ad62a5c155af9199af9e69b889claireho // of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing!!!! 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 swapCJK(UChar32 i) { 102527f654740f2a26ad62a5c155af9199af9e69b889claireho if (i < CJK_A_BASE) { 102627f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 102727f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_A_LIMIT) { 102827f654740f2a26ad62a5c155af9199af9e69b889claireho // Extension A has lower code points than the original Unihan+compat 102927f654740f2a26ad62a5c155af9199af9e69b889claireho // but sorts higher. 103027f654740f2a26ad62a5c155af9199af9e69b889claireho return i - CJK_A_BASE 103127f654740f2a26ad62a5c155af9199af9e69b889claireho + (CJK_LIMIT - CJK_BASE) 103227f654740f2a26ad62a5c155af9199af9e69b889claireho + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE); 103327f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_BASE) { 103427f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 103527f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_LIMIT) { 103627f654740f2a26ad62a5c155af9199af9e69b889claireho return i - CJK_BASE; 103727f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_COMPAT_USED_BASE) { 103827f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 103927f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_COMPAT_USED_LIMIT) { 104027f654740f2a26ad62a5c155af9199af9e69b889claireho return i - CJK_COMPAT_USED_BASE 104127f654740f2a26ad62a5c155af9199af9e69b889claireho + (CJK_LIMIT - CJK_BASE); 104227f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_B_BASE) { 104327f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 104427f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_B_LIMIT) { 104527f654740f2a26ad62a5c155af9199af9e69b889claireho return i; // non-BMP-CJK 104627f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_C_BASE) { 104727f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 104827f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_C_LIMIT) { 104927f654740f2a26ad62a5c155af9199af9e69b889claireho return i; // non-BMP-CJK 105027f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_D_BASE) { 105127f654740f2a26ad62a5c155af9199af9e69b889claireho // non-CJK 105227f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (i < CJK_D_LIMIT) { 105327f654740f2a26ad62a5c155af9199af9e69b889claireho return i; // non-BMP-CJK 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return i + NON_CJK_OFFSET; // non-CJK 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getRawFromCodePoint(UChar32 i) { 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return swapCJK(i)+1; 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getCodePointFromRaw(UChar32 i) { 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i--; 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 result = 0; 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i >= NON_CJK_OFFSET) { 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i - NON_CJK_OFFSET; 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(i >= CJK_B_BASE) { 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i; 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i < CJK_LIMIT - CJK_BASE) { 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i + CJK_BASE; 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE); 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE); 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = -1; 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// GET IMPLICIT PRIMARY WEIGHTS 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Return value is left justified primary key 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getImplicitFromRaw(UChar32 cp) { 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0 || cp > UCOL_MAX_INPUT) { 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp)); 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last0 = cp - min4Boundary; 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (last0 < 0) { 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last1 = cp / final3Count; 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last0 = cp % final3Count; 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last2 = last1 / medialCount; 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last1 %= medialCount; 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last1 = minTrail + last1; // offset 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last2 = min3Primary + last2; // offset 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (last2 >= min4Primary) { 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2)); 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (last2 << 24) + (last1 << 16) + (last0 << 8); 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last1 = last0 / final4Count; 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last0 %= final4Count; 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last2 = last1 / medialCount; 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last1 %= medialCount; 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t last3 = last2 / medialCount; 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last2 %= medialCount; 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last1 = minTrail + last1; // offset 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last2 = minTrail + last2; // offset 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last3 = min4Primary + last3; // offset 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (last3 > max4Primary) { 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3)); 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0; 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t U_EXPORT2 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getImplicitPrimary(UChar32 cp) { 113627f654740f2a26ad62a5c155af9199af9e69b889claireho //fprintf(stdout, "Incoming: %04x\n", cp); 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp)); 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp = swapCJK(cp); 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp++; 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we now have a range of numbers from 0 to 21FFFF. 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp)); 114427f654740f2a26ad62a5c155af9199af9e69b889claireho //fprintf(stdout, "CJK swapped: %04x\n", cp); 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return uprv_uca_getImplicitFromRaw(cp); 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Converts implicit CE into raw integer ("code point") 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param implicit 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return -1 if illegal format 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getRawFromImplicit(uint32_t implicit) { 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 result; 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b3 = implicit & 0xFF; 1158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 b2 = (implicit >> 8) & 0xFF; 1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 b1 = (implicit >> 16) & 0xFF; 1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 b0 = (implicit >> 24) & 0xFF; 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // simple parameter checks 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b0 < min3Primary || b0 > max4Primary 1164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || b1 < minTrail || b1 > maxTrail) 1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // normal offsets 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b1 -= minTrail; 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // take care of the final values, and compose 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b0 < min4Primary) { 1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (b2 < minTrail || b2 > max3Trail || b3 != 0) 1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b2 -= minTrail; 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 remainder = b2 % final3Multiplier; 1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (remainder != 0) 1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b0 -= min3Primary; 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b2 /= final3Multiplier; 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ((b0 * medialCount) + b1) * final3Count + b2; 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (b2 < minTrail || b2 > maxTrail 1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || b3 < minTrail || b3 > max4Trail) 1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b2 -= minTrail; 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b3 -= minTrail; 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 remainder = b3 % final4Multiplier; 1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (remainder != 0) 1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b3 /= final4Multiplier; 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b0 -= min4Primary; 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary; 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // final check 1194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result < 0 || result > UCOL_MAX_INPUT) 1195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t divideAndRoundUp(int a, int b) { 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1 + (a-1)/b; 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this function is either called from initUCA or from genUCA before 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * doing canonical closure for the UCA. 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set up to generate implicits. 1210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Maintenance Note: this function may end up being called more than once, due 1211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * to threading races during initialization. Make sure that 1212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * none of the Constants is ever transiently assigned an 1213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * incorrect value. 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param minPrimary 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param maxPrimary 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param minTrail final byte 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param maxTrail final byte 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param gap3 the gap we leave for tailoring for 3-byte forms 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param gap4 the gap we leave for tailoring for 4-byte forms 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void initImplicitConstants(int minPrimary, int maxPrimary, 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int minTrailIn, int maxTrailIn, 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int gap3, int primaries3count, 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // some simple parameter checks 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) 1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF) 1228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (primaries3count < 1)) 1229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru minTrail = minTrailIn; 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxTrail = maxTrailIn; 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min3Primary = minPrimary; 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max4Primary = maxPrimary; 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // compute constants for use later. 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // number of values we can use in trailing bytes 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // leave room for empty values between AND above, e.g. if gap = 2 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // range 3..7 => +3 -4 -5 -6 -7: so 1 value 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final3Multiplier = gap3 + 1; 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final3Count = (maxTrail - minTrail + 1) / final3Multiplier; 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max3Trail = minTrail + (final3Count - 1) * final3Multiplier; 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // medials can use full range 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru medialCount = (maxTrail - minTrail + 1); 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find out how many values fit in each form 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t threeByteCount = medialCount * final3Count; 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // now determine where the 3/4 boundary is. 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we use 3 bytes below the boundary, and 4 above 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t primariesAvailable = maxPrimary - minPrimary + 1; 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t primaries4count = primariesAvailable - primaries3count; 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t min3ByteCoverage = primaries3count * threeByteCount; 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min4Primary = minPrimary + primaries3count; 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min4Boundary = min3ByteCoverage; 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now expand out the multiplier for the 4 bytes, and redo. 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary; 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count); 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount); 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte; 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (gap4 < 1) { 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final4Multiplier = gap4 + 1; 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru final4Count = neededPerFinalByte; 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru max4Trail = minTrail + (final4Count - 1) * final4Multiplier; 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Supply parameters for generating implicit CEs 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruuprv_uca_initImplicitConstants(UErrorCode *status) { 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms. 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status); 1284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status); 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* collIterNormalize Incremental Normalization happens here. */ 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* pick up the range of chars identifed by FCD, */ 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* normalize it into the collIterate's writable buffer, */ 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* switch the collIterate's state to use the writable buffer. */ 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid collIterNormalize(collIterate *collationSource) 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *srcP = collationSource->pos - 1; /* Start of chars to normalize */ 129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *endP = collationSource->fcdPosition; /* End of region to normalize+1 */ 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - srcP)), 130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collationSource->writableBuffer, 130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status); 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_errorName(status)); 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collationSource->pos = collationSource->writableBuffer.getTerminatedBuffer(); 1311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->origFlags = collationSource->flags; 1312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->flags |= UCOL_ITER_INNORMBUF; 1313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR); 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This function takes the iterator and extracts normalized stuff up to the next boundary 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is similar in the end results to the collIterNormalize, but for the cases when we 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// use an iterator 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizeIterator(collIterate *collationSource) { 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasNormalized = FALSE; 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->iterator, UITER_CURRENT); 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t iterIndex = collationSource->iterator->getState(collationSource->iterator); 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer, 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status); 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(status == U_BUFFER_OVERFLOW_ERROR || normLen == (int32_t)collationSource->writableBufSize) { 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reallocate and terminate 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!u_growBufferFromStatic(collationSource->stackWritableBuffer, 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &collationSource->writableBuffer, 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int32_t *)&collationSource->writableBufSize, normLen + 1, 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0) 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #ifdef UCOL_DEBUG 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "normalizeIterator(), out of memory\n"); 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #endif 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //collationSource->iterator->move(collationSource->iterator, iterIndex, UITER_ZERO); 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->iterator->setState(collationSource->iterator, iterIndex, &status); 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer, 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status); 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Terminate the buffer - we already checked that it is big enough 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->writableBuffer[normLen] = 0; 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(collationSource->writableBuffer != collationSource->stackWritableBuffer) { 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->flags |= UCOL_ITER_ALLOCATED; 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->pos = collationSource->writableBuffer; 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->origFlags = collationSource->flags; 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->flags |= UCOL_ITER_INNORMBUF; 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR); 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/ 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Incremental FCD check and normalize */ 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Called from getNextCE when normalization state is suspect. */ 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* When entering, the state is known to be this: */ 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* o We are working in the main buffer of the collIterate, not the side */ 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* writable buffer. When in the side buffer, normalization mode is always off, */ 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* so we won't get here. */ 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* o The leading combining class from the current character is 0 or */ 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* the trailing combining class of the previous char was zero. */ 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* True because the previous call to this function will have always exited */ 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* that way, and we get called for every char where cc might be non-zero. */ 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIterFCD(collIterate *collationSource) { 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcP, *endP; 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadingCC; 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t prevTrailingCC = 0; 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t fcd; 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool needNormalize = FALSE; 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcP = collationSource->pos-1; 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collationSource->flags & UCOL_ITER_HASLEN) { 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru endP = collationSource->endp; 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru endP = NULL; 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get the trailing combining class of the current character. If it's zero, 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we are OK. 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trie access */ 1387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP); 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fcd != 0) { 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevTrailingCC != 0) { 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The current char has a non-zero trailing CC. Scan forward until we find 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a char with a leading cc of zero. 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (endP == NULL || srcP != endP) 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *savedSrcP = srcP; 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trie access */ 1399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP); 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (leadingCC == 0) { 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcP = savedSrcP; // Hit char that is not part of combining sequence. 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // back up over it. (Could be surrogate pair!) 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (leadingCC < prevTrailingCC) { 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru needNormalize = TRUE; 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->fcdPosition = (UChar *)srcP; 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return needNormalize; 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the CE retrieval functions */ 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t getImplicit(UChar32 cp, collIterate *collationSource); 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource); 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there should be a macro version of this function in the header file */ 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the first function that tries to fetch a collation element */ 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* If it's not succesfull or it encounters a more difficult situation */ 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* some more sofisticated and slower functions are invoked */ 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) { 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t order = 0; 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collationSource->CEpos > collationSource->toReturn) { /* Are there any CEs from previous expansions? */ 1437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = *(collationSource->toReturn++); /* if so, return them */ 1438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(collationSource->CEpos == collationSource->toReturn) { 1439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->CEpos = collationSource->toReturn = collationSource->extendCEs ? collationSource->extendCEs : collationSource->CEs; 1440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return order; 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ch = 0; 1445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->offsetReturn = NULL; 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) /* Loop handles case when incremental normalize switches */ 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { /* to or from the side buffer / original string, and we */ 1449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* need to start again to get the next character. */ 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The source string is null terminated and we're not working from the side buffer, 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and we're not normalizing. This is the fast path. 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.) 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *collationSource->pos++; 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch != 0) { 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NO_MORE_CES; 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collationSource->flags & UCOL_ITER_HASLEN) { 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Normal path for strings when length is specified. 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (We can't be in side buffer because it is always null terminated.) 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collationSource->pos >= collationSource->endp) { 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Ran off of the end of the main source string. We're done. 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NO_MORE_CES; 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *collationSource->pos++; 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if(collationSource->flags & UCOL_USE_ITERATOR) { 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 iterCh = collationSource->iterator->next(collationSource->iterator); 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(iterCh == U_SENTINEL) { 1477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_NO_MORE_CES; 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (UChar)iterCh; 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Null terminated string. 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *collationSource->pos++; 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch == 0) { 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Ran off end of buffer. 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Ran off end of main string. backing up one character. 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->pos--; 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NO_MORE_CES; 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Hit null in the normalize side buffer. 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Usually this means the end of the normalized data, 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // except for one odd case: a null followed by combining chars, 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which is the case if we are at the start of the buffer. 149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) { 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Null marked end of side buffer. 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Revert to the main string and 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // loop back to top to try again to get a character. 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->pos = collationSource->fcdPosition; 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collationSource->flags = collationSource->origFlags; 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(collationSource->flags&UCOL_HIRAGANA_Q) { 1513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag 1514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * based on whether the previous codepoint was Hiragana or Katakana. 1515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) || 1517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) { 1518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->flags |= UCOL_WAS_HIRAGANA; 1519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->flags &= ~UCOL_WAS_HIRAGANA; 1521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We've got a character. See if there's any fcd and/or normalization stuff to do. 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer. 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((collationSource->flags & UCOL_ITER_NORM) == 0) { 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collationSource->fcdPosition >= collationSource->pos) { 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // An earlier FCD check has already covered the current character. 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We can go ahead and process this char. 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch < ZERO_CC_LIMIT_ ) { 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fast fcd safe path. Trailing combining class == 0. This char is OK. 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We need to peek at the next character in order to tell if we are FCD 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) { 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are at the last char of source string. 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // It is always OK for FCD check. 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Need a more complete FCD check and possible normalization. 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collIterFCD(collationSource)) { 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterNormalize(collationSource); 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No normalization was needed. Go ahead and process the char we already had. 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Some normalization happened. Next loop iteration will pick up a char 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // from the normalization buffer. 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } // end for (;;) 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ch <= 0xFF) { 1572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* For latin-1 characters we never need to fall back to the UCA table */ 1573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* because all of the UCA data is replicated in the latinOneMapping array */ 1574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = coll->latinOneMapping[ch]; 1575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (order > UCOL_NOT_FOUND) { 1576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); 1577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Always use UCA for Han, Hangul 1582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // (Han extension A is before main Han block) 1583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** Han compatibility chars ?? **** 1584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && 1585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { 1586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { 1587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // between the two target ranges; do normal lookup 1588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** this range is YI, Modifier tone letters, **** 1589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** Latin-D, Syloti Nagari, Phagas-pa. **** 1590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** Latin-D might be tailored, so we need to **** 1591b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** do the normal lookup for these guys. **** 1592b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 1593b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1594b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // in one of the target ranges; use UCA 1595b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru order = UCOL_NOT_FOUND; 1596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1597b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1598b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 1599b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1600b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(order > UCOL_NOT_FOUND) { /* if a CE is special */ 1602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */ 1603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1604b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */ 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */ 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */ 1610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status); 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(order == UCOL_NOT_FOUND) { 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru order = getImplicit(ch, collationSource); 1616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return order; /* return the CE */ 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_getNextCE, out-of-line version for use from other files. */ 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) { 1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ucol_IGetNextCE(coll, collationSource, status); 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Incremental previous normalization happens here. Pick up the range of chars 1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* identifed by FCD, normalize it into the collIterate's writable buffer, 1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* switch the collIterate's state to use the writable buffer. 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid collPrevIterNormalize(collIterate *data) 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pEnd = data->pos; /* End normalize + 1 */ 163850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pStart; 1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Start normalize */ 1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->fcdPosition == NULL) { 1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStart = data->string; 1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStart = data->fcdPosition + 1; 1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t normLen = 164950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pStart) + 1)), 165050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer, 165150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status). 165250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length(); 165350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(status)) { 165450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this puts the null termination infront of the normalized string instead 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru of the end 1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 166050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.insert(0, (UChar)0); 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 1663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The usual case at this point is that we've got a base 1664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * character followed by marks that were normalized. If 1665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * fcdPosition is NULL, that means that we backed up to 1666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * the beginning of the string and there's no base character. 1667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Forward processing will usually normalize when it sees 1669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * the first mark, so that mark will get it's natural offset 1670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * and the rest will get the offset of the character following 1671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * the marks. The base character will also get its natural offset. 1672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * We write the offset of the base character, if there is one, 1674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * followed by the offset of the first mark and then the offsets 1675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * of the rest of the marks. 1676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t firstMarkOffset = 0; 167850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t trailOffset = (int32_t)(data->pos - data->string + 1); 1679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t trailCount = normLen - 1; 1680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->fcdPosition != NULL) { 168250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t baseOffset = (int32_t)(data->fcdPosition - data->string); 1683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar baseChar = *data->fcdPosition; 1684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru firstMarkOffset = baseOffset + 1; 1686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 168850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the base character is the start of a contraction, forward processing 168950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * will normalize the marks while checking for the contraction, which means 169050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * that the offset of the first mark will the same as the other marks. 169150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 169250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * **** THIS IS PROBABLY NOT A COMPLETE TEST **** 169350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 169450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (baseChar >= 0x100) { 169550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, baseChar); 169650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 169750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) { 169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, baseChar); 169950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION_TAG) { 170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstMarkOffset = trailOffset; 170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 170627f654740f2a26ad62a5c155af9199af9e69b889claireho data->appendOffset(baseOffset, status); 1707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 170927f654740f2a26ad62a5c155af9199af9e69b889claireho data->appendOffset(firstMarkOffset, status); 1710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (int32_t i = 0; i < trailCount; i += 1) { 171227f654740f2a26ad62a5c155af9199af9e69b889claireho data->appendOffset(trailOffset, status); 1713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetRepeatValue = trailOffset; 1716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetReturn = data->offsetStore - 1; 1718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->offsetReturn == data->offsetBuffer) { 1719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetStore = data->offsetBuffer; 1720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 172250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.getTerminatedBuffer() + 1 + normLen; 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->origFlags = data->flags; 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags |= UCOL_ITER_INNORMBUF; 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Incremental FCD check for previous iteration and normalize. Called from 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* getPrevCE when normalization state is suspect. 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* When entering, the state is known to be this: 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* o We are working in the main buffer of the collIterate, not the side 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* writable buffer. When in the side buffer, normalization mode is always 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* off, so we won't get here. 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* o The leading combining class from the current character is 0 or the 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* trailing combining class of the previous char was zero. 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* True because the previous call to this function will have always exited 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* that way, and we get called for every char where cc might be non-zero. 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterate struct 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return normalization status, TRUE for normalization to be done, FALSE 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* otherwise 1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collPrevIterFCD(collIterate *data) 1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, *start; 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadingCC; 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t trailingCC = 0; 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t fcd; 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool result = FALSE; 1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start = data->string; 1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src = data->pos + 1; 1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Get the trailing combining class of the current character. */ 1757b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src); 1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); 1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (leadingCC != 0) { 1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru The current char has a non-zero leading combining class. 1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Scan backward until we find a char with a trailing cc of zero. 1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start == src) { 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition = NULL; 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1773b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src); 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (trailingCC == 0) { 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (leadingCC < trailingCC) { 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = TRUE; 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition = (UChar *)src; 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179427f654740f2a26ad62a5c155af9199af9e69b889claireho/** gets a code unit from the string at a given offset 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Handles both normal and iterative cases. 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * No error checking - caller beware! 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 179827f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline 179927f654740f2a26ad62a5c155af9199af9e69b889clairehoUChar peekCodeUnit(collIterate *source, int32_t offset) { 1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->pos != NULL) { 1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->pos + offset); 1802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(source->iterator != NULL) { 180327f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c; 1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(offset != 0) { 1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->iterator->move(source->iterator, offset, UITER_CURRENT); 180627f654740f2a26ad62a5c155af9199af9e69b889claireho c = source->iterator->next(source->iterator); 1807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->iterator->move(source->iterator, -offset-1, UITER_CURRENT); 1808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 180927f654740f2a26ad62a5c155af9199af9e69b889claireho c = source->iterator->current(source->iterator); 1810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 181127f654740f2a26ad62a5c155af9199af9e69b889claireho return c >= 0 ? (UChar)c : 0xfffd; // If the caller works properly, we should never see c<0. 1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 181327f654740f2a26ad62a5c155af9199af9e69b889claireho return 0xfffd; 1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 181727f654740f2a26ad62a5c155af9199af9e69b889claireho// Code point version. Treats the offset as a _code point_ delta. 181827f654740f2a26ad62a5c155af9199af9e69b889claireho// We cannot use U16_FWD_1_UNSAFE and similar because we might not have well-formed UTF-16. 181927f654740f2a26ad62a5c155af9199af9e69b889claireho// We cannot use U16_FWD_1 and similar because we do not know the start and limit of the buffer. 182027f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline 182127f654740f2a26ad62a5c155af9199af9e69b889clairehoUChar32 peekCodePoint(collIterate *source, int32_t offset) { 182227f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c; 182327f654740f2a26ad62a5c155af9199af9e69b889claireho if(source->pos != NULL) { 182427f654740f2a26ad62a5c155af9199af9e69b889claireho const UChar *p = source->pos; 182527f654740f2a26ad62a5c155af9199af9e69b889claireho if(offset >= 0) { 182627f654740f2a26ad62a5c155af9199af9e69b889claireho // Skip forward over (offset-1) code points. 182727f654740f2a26ad62a5c155af9199af9e69b889claireho while(--offset >= 0) { 182827f654740f2a26ad62a5c155af9199af9e69b889claireho if(U16_IS_LEAD(*p++) && U16_IS_TRAIL(*p)) { 182927f654740f2a26ad62a5c155af9199af9e69b889claireho ++p; 183027f654740f2a26ad62a5c155af9199af9e69b889claireho } 183127f654740f2a26ad62a5c155af9199af9e69b889claireho } 183227f654740f2a26ad62a5c155af9199af9e69b889claireho // Read the code point there. 183327f654740f2a26ad62a5c155af9199af9e69b889claireho c = *p++; 183427f654740f2a26ad62a5c155af9199af9e69b889claireho UChar trail; 183527f654740f2a26ad62a5c155af9199af9e69b889claireho if(U16_IS_LEAD(c) && U16_IS_TRAIL(trail = *p)) { 183627f654740f2a26ad62a5c155af9199af9e69b889claireho c = U16_GET_SUPPLEMENTARY(c, trail); 183727f654740f2a26ad62a5c155af9199af9e69b889claireho } 183827f654740f2a26ad62a5c155af9199af9e69b889claireho } else /* offset<0 */ { 183927f654740f2a26ad62a5c155af9199af9e69b889claireho // Skip backward over (offset-1) code points. 184027f654740f2a26ad62a5c155af9199af9e69b889claireho while(++offset < 0) { 184127f654740f2a26ad62a5c155af9199af9e69b889claireho if(U16_IS_TRAIL(*--p) && U16_IS_LEAD(*(p - 1))) { 184227f654740f2a26ad62a5c155af9199af9e69b889claireho --p; 184327f654740f2a26ad62a5c155af9199af9e69b889claireho } 184427f654740f2a26ad62a5c155af9199af9e69b889claireho } 184527f654740f2a26ad62a5c155af9199af9e69b889claireho // Read the code point before that. 184627f654740f2a26ad62a5c155af9199af9e69b889claireho c = *--p; 184727f654740f2a26ad62a5c155af9199af9e69b889claireho UChar lead; 184827f654740f2a26ad62a5c155af9199af9e69b889claireho if(U16_IS_TRAIL(c) && U16_IS_LEAD(lead = *(p - 1))) { 184927f654740f2a26ad62a5c155af9199af9e69b889claireho c = U16_GET_SUPPLEMENTARY(lead, c); 185027f654740f2a26ad62a5c155af9199af9e69b889claireho } 185127f654740f2a26ad62a5c155af9199af9e69b889claireho } 185227f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(source->iterator != NULL) { 185327f654740f2a26ad62a5c155af9199af9e69b889claireho if(offset >= 0) { 185427f654740f2a26ad62a5c155af9199af9e69b889claireho // Skip forward over (offset-1) code points. 185527f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t fwd = offset; 185627f654740f2a26ad62a5c155af9199af9e69b889claireho while(fwd-- > 0) { 185727f654740f2a26ad62a5c155af9199af9e69b889claireho uiter_next32(source->iterator); 185827f654740f2a26ad62a5c155af9199af9e69b889claireho } 185927f654740f2a26ad62a5c155af9199af9e69b889claireho // Read the code point there. 186027f654740f2a26ad62a5c155af9199af9e69b889claireho c = uiter_current32(source->iterator); 186127f654740f2a26ad62a5c155af9199af9e69b889claireho // Return to the starting point, skipping backward over (offset-1) code points. 186227f654740f2a26ad62a5c155af9199af9e69b889claireho while(offset-- > 0) { 186327f654740f2a26ad62a5c155af9199af9e69b889claireho uiter_previous32(source->iterator); 186427f654740f2a26ad62a5c155af9199af9e69b889claireho } 186527f654740f2a26ad62a5c155af9199af9e69b889claireho } else /* offset<0 */ { 186627f654740f2a26ad62a5c155af9199af9e69b889claireho // Read backward, reading offset code points, remember only the last-read one. 186727f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t back = offset; 186827f654740f2a26ad62a5c155af9199af9e69b889claireho do { 186927f654740f2a26ad62a5c155af9199af9e69b889claireho c = uiter_previous32(source->iterator); 187027f654740f2a26ad62a5c155af9199af9e69b889claireho } while(++back < 0); 187127f654740f2a26ad62a5c155af9199af9e69b889claireho // Return to the starting position, skipping forward over offset code points. 187227f654740f2a26ad62a5c155af9199af9e69b889claireho do { 187327f654740f2a26ad62a5c155af9199af9e69b889claireho uiter_next32(source->iterator); 187427f654740f2a26ad62a5c155af9199af9e69b889claireho } while(++offset < 0); 187527f654740f2a26ad62a5c155af9199af9e69b889claireho } 187627f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 187727f654740f2a26ad62a5c155af9199af9e69b889claireho c = U_SENTINEL; 187827f654740f2a26ad62a5c155af9199af9e69b889claireho } 187927f654740f2a26ad62a5c155af9199af9e69b889claireho return c; 188027f654740f2a26ad62a5c155af9199af9e69b889claireho} 188127f654740f2a26ad62a5c155af9199af9e69b889claireho 1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Determines if we are at the start of the data string in the backwards 1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collation iterator 1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator 1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return TRUE if we are at the start 1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool isAtStartPrevIterate(collIterate *data) { 1890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->pos == NULL && data->iterator != NULL) { 1891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return !data->iterator->hasPrevious(data->iterator); 1892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //return (collIter_bos(data)) || 1894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (data->pos == data->string) || 1895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ((data->flags & UCOL_ITER_INNORMBUF) && 1896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(data->pos - 1) == 0 && data->fcdPosition == NULL); 1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void goBackOne(collIterate *data) { 1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# if 0 1902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // somehow, it looks like we need to keep iterator synced up 1903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // at all times, as above. 1904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->pos) { 1905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->pos--; 1906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->iterator) { 1908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->iterator->previous(data->iterator); 1909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) { 1912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->iterator->previous(data->iterator); 1913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(data->pos) { 1915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->pos --; 1916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inline function that gets a simple CE. 1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* So what it does is that it will first check the expansion buffer. If the 1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* expansion buffer is not empty, ie the end pointer to the expansion buffer 1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* is different from the string pointer, we return the collation element at the 1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* return pointer and decrement it. 1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* For more complicated CEs it resorts to getComplicatedCE. 1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll collator data 1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator struct 1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param status error status 1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data, 1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t result = (uint32_t)UCOL_NULLORDER; 1935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->offsetReturn != NULL) { 1937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->offsetRepeatCount > 0) { 1938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetRepeatCount -= 1; 1939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->offsetReturn == data->offsetBuffer) { 1941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetReturn = NULL; 194227f654740f2a26ad62a5c155af9199af9e69b889claireho data->offsetStore = data->offsetBuffer; 1943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetReturn -= 1; 1945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((data->extendCEs && data->toReturn > data->extendCEs) || 1950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (!data->extendCEs && data->toReturn > data->CEs)) 1951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->toReturn -= 1; 1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = *(data->toReturn); 1954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->CEs == data->toReturn || data->extendCEs == data->toReturn) { 1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->CEpos = data->toReturn; 1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ch = 0; 1960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Loop handles case when incremental normalize switches to or from the 1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru side buffer / original string, and we need to start again to get the 1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next character. 1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->flags & UCOL_ITER_HASLEN) { 1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normal path for strings when length is specified. 1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Not in side buffer because it is always null terminated. 1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->pos <= data->string) { 1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* End of the main source string */ 1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NO_MORE_CES; 1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos --; 1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *data->pos; 1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we are using an iterator to go back. Pray for us! 1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (data->flags & UCOL_USE_ITERATOR) { 1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 iterCh = data->iterator->previous(data->iterator); 1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(iterCh == U_SENTINEL) { 1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NO_MORE_CES; 1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (UChar)iterCh; 1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos --; 1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *data->pos; 1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we are in the side buffer. */ 1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch == 0) { 1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru At the start of the normalize side buffer. 1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Go back to string. 1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Because pointer points to the last accessed character, 1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hence we have to increment it by one here. 1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->flags = data->origFlags; 2000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data->offsetRepeatValue = 0; 2001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->fcdPosition == NULL) { 2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = data->string; 2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NO_MORE_CES; 2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = data->fcdPosition + 1; 2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->flags&UCOL_HIRAGANA_Q) { 2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch>=0x3040 && ch<=0x309f) { 2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags |= UCOL_WAS_HIRAGANA; 2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags &= ~UCOL_WAS_HIRAGANA; 2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * got a character to determine if there's fcd and/or normalization 2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * stuff to do. 2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the current character is not fcd. 2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if current character is at the start of the string 2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Trailing combining class == 0. 2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note if pos is in the writablebuffer, norm is always 0 2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch < ZERO_CC_LIMIT_ || 2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this should propel us out of the loop in the iterator case 2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (data->flags & UCOL_ITER_NORM) == 0 || 2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (data->fcdPosition != NULL && data->fcdPosition <= data->pos) 2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || data->string == data->pos) { 2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { 2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if next character is FCD */ 2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->pos == data->string) { 2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* First char of string is always OK for FCD check */ 2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Not first char of string, do the FCD fast test */ 2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { 2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Need a more complete FCD check and possible normalization. */ 2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collPrevIterFCD(data)) { 2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collPrevIterNormalize(data); 2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { 2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* No normalization. Go ahead and process the char. */ 2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Some normalization happened. 2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Next loop picks up a char from the normalization buffer. 2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* attempt to handle contractions, after removal of the backwards 2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contraction 2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) { 2071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status); 2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ch <= 0xFF) { 2074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = coll->latinOneMapping[ch]; 2075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 2077b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Always use UCA for [3400..9FFF], [AC00..D7AF] 2078b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** [FA0E..FA2F] ?? **** 2079b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && 2080b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (ch >= 0x3400 && ch <= 0xD7AF)) { 2081b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (ch > 0x9FFF && ch < 0xAC00) { 2082b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // between the two target ranges; do normal lookup 2083b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** this range is YI, Modifier tone letters, **** 2084b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** Latin-D, Syloti Nagari, Phagas-pa. **** 2085b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** Latin-D might be tailored, so we need to **** 2086b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** do the normal lookup for these guys. **** 2087b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 2088b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 2089b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result = UCOL_NOT_FOUND; 2090b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2091b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 2092b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 2093b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result > UCOL_NOT_FOUND) { 2096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status); 2097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result == UCOL_NOT_FOUND) { // Not found in master list 2099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!isAtStartPrevIterate(data) && 2100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_contractionEndCP(ch, data->coll)) 2101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = UCOL_CONTRACTION; 2103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 2104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->UCA) { 2105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 2106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result > UCOL_NOT_FOUND) { 2110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->UCA) { 2111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status); 2112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result == UCOL_NOT_FOUND) { 2118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = getPrevImplicit(ch, data); 2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_getPrevCE, out-of-line version for use from other files. */ 2127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint32_t U_EXPORT2 2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getPrevCE(const UCollator *coll, collIterate *data, 2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ucol_IGetPrevCE(coll, data, status); 2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this should be connected to special Jamo handling */ 2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint32_t U_EXPORT2 2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) { 2137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate colIt; 213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, &u, 1, &colIt, status); 213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_IGetNextCE(coll, &colIt, status); 2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument character into the end of the buffer pushing back the 2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* null terminator. 2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate struct data 2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch character to be appended 2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the position of the new addition 2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 215350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline const UChar * insertBufferEnd(collIterate *data, UChar ch) 2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 215550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldLength = data->writableBuffer.length(); 215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength; 2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument string into the end of the buffer pushing back the 2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* null terminator. 2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate struct data 2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param string to be appended 2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param length of the string to be appended 2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the position of the new addition 2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 216850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline const UChar * insertBufferEnd(collIterate *data, const UChar *str, int32_t length) 2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 217050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldLength = data->writableBuffer.length(); 217150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldLength; 2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Special normalization function for contraction in the forwards iterator. 2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This normalization sequence will place the current character at source->pos 2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and its following normalized sequence into the buffer. 2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The fcd position, pos will be changed. 2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* pos will now point to positions in the buffer. 2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Flags will be changed accordingly. 2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data 2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizeNextContraction(collIterate *data) 2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 218650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t strsize; 2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* because the pointer points to the next character */ 218950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pStart = data->pos - 1; 219050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pEnd; 2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { 219350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.setTo(*(pStart - 1)); 2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strsize = 1; 2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 219750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho strsize = data->writableBuffer.length(); 2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pEnd = data->fcdPosition; 2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 220250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.append( 220350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), status)); 220450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(status)) { 220550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 220850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.getTerminatedBuffer() + strsize; 2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->origFlags = data->flags; 2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags |= UCOL_ITER_INNORMBUF; 2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Contraction character management function that returns the next character 2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* for the forwards iterator. 2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Does nothing if the next character is in buffer and not the first character 2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* in it. 2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Else it checks next character in data string to see if it is normalizable. 2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* If it is not, the character is simply copied into the buffer, else 2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the whole normalized substring is copied into the buffer, including the 2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* current character. 2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data 2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return next character 2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UChar getNextNormalizedChar(collIterate *data) 2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar nextch; 2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ch; 2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Here we need to add the iterator code. One problem is the way 2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // end of string is handled. If we just return next char, it could 2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be the sentinel. Most of the cases already check for this, but we 2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // need to be sure. 2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ) { 2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if no normalization and not in buffer. */ 2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->flags & UCOL_USE_ITERATOR) { 2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar)data->iterator->next(data->iterator); 2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (data->flags & UCOL_ITER_NORM && data->flags & UCOL_USE_ITERATOR) { 2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //normalizeIterator(data); 2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF); 2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((innormbuf && *data->pos != 0) || 2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (data->fcdPosition != NULL && !innormbuf && 2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos < data->fcdPosition)) { 2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if next character is in normalized buffer, no further normalization 2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru is required 2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->flags & UCOL_ITER_HASLEN) { 2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* in data string */ 2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->pos + 1 == data->endp) { 2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (innormbuf) { 2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // inside the normalization buffer, but at the end 2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (since we encountered zero). This means, in the 2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // case we're using char iterator, that we need to 2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do another round of normalization. 2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if(data->origFlags & UCOL_USE_ITERATOR) { 2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we need to restore original flags, 2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // otherwise, we'll lose them 2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //data->flags = data->origFlags; 2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //normalizeIterator(data); 2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //return *(data->pos++); 2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} else { 2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in writable buffer, at this point fcdPosition can not be 2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pointing to the end of the data string. see contracting tag. 2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->fcdPosition) { 2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(data->fcdPosition + 1) == 0 || 2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition + 1 == data->endp) { 2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* at the end of the string, dump it into the normalizer */ 228650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1; 2287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check if data->pos received a null pointer 2288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->pos == NULL) { 2289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (UChar)-1; // Return to indicate error. 2290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->fcdPosition ++); 2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = data->fcdPosition; 2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(data->origFlags & UCOL_USE_ITERATOR) { 2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we are here, we're using a normalizing iterator. 2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we should just continue further. 2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags = data->origFlags; 2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = NULL; 2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar)data->iterator->next(data->iterator); 2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(data->pos + 1) == 0) { 2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *data->pos ++; 2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextch = *data->pos; 2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the current character is not fcd. 2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Trailing combining class == 0. 2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->fcdPosition == NULL || data->fcdPosition < data->pos) && 2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ || 2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) { 2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Need a more complete FCD check and possible normalization. 2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalize substring will be appended to buffer 2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collIterFCD(data)) { 2325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizeNextContraction(data); 2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (innormbuf) { 2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fcdposition shifted even when there's no normalization, if we 2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru don't input the rest into this, we'll get the wrong position when 2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru we reach the end of the writableBuffer */ 233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length = (int32_t)(data->fcdPosition - data->pos + 1); 233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = insertBufferEnd(data, data->pos - 1, length); 2334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check if data->pos received a null pointer 2335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->pos == NULL) { 2336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (UChar)-1; // Return to indicate error. 2337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos ++); 2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (innormbuf) { 2343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru no normalization is to be done hence only one character will be 2345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appended to the buffer. 2346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 234750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = insertBufferEnd(data, ch) + 1; 2348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check if data->pos received a null pointer 2349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (data->pos == NULL) { 2350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (UChar)-1; // Return to indicate error. 2351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* points back to the pos in string */ 2355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ch; 2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Function to copy the buffer into writableBuffer and sets the fcd position to 2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the correct position 2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param source data string source 2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param buffer character buffer 2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 236750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &buffer) 2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* okay confusing part here. to ensure that the skipped characters are 2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru considered later, we need to place it in the appropriate position in the 2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalization buffer and reassign the pos pointer. simple case if pos 2372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reside in string, simply copy to normalization buffer and 2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fcdposition = pos, pos = start of normalization buffer. if pos in 2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalization buffer, we'll insert the copy infront of pos and point pos 2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru to the start of the normalization buffer. why am i doing these copies? 2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecialCE does 2377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru not require any changes, which be really painful. */ 2378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (source->flags & UCOL_ITER_INNORMBUF) { 237950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t replaceLength = source->pos - source->writableBuffer.getBuffer(); 238050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->writableBuffer.replace(0, replaceLength, buffer); 2381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->fcdPosition = source->pos; 2384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->origFlags = source->flags; 2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->flags |= UCOL_ITER_INNORMBUF; 2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR); 238750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->writableBuffer = buffer; 2388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->pos = source->writableBuffer.getTerminatedBuffer(); 2391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Function to get the discontiguos collation element within the source. 2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Note this function will set the position to the appropriate places. 2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll current collator used 2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param source data string source 2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param constart index to the start character in the contraction table 2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return discontiguos collation element offset 2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t getDiscontiguous(const UCollator *coll, collIterate *source, 2403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *constart) 2404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* source->pos currently points to the second combining character after 2406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru the start character */ 240750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *temppos = source->pos; 240850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString buffer; 2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *tempconstart = constart; 2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tempflags = source->flags; 2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool multicontraction = FALSE; 2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterateState discState; 2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backupState(source, &discState); 2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 241627f654740f2a26ad62a5c155af9199af9e69b889claireho buffer.setTo(peekCodePoint(source, -1)); 2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *UCharOffset; 2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar schar, 2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tchar; 2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t result; 2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp) 242427f654740f2a26ad62a5c155af9199af9e69b889claireho || (peekCodeUnit(source, 0) == 0 && 2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //|| (*source->pos == 0 && 2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((source->flags & UCOL_ITER_INNORMBUF) == 0 || 2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->fcdPosition == NULL || 2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->fcdPosition == source->endp || 2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(source->fcdPosition) == 0 || 2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getCombiningClass(*(source->fcdPosition)) == 0)) || 2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* end of string in null terminated string or stopped by a 2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru null character, note fcd does not always point to a base 2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru character after the discontiguos change */ 243427f654740f2a26ad62a5c155af9199af9e69b889claireho u_getCombiningClass(peekCodePoint(source, 0)) == 0) { 2435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //u_getCombiningClass(*(source->pos)) == 0) { 2436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //constart = (UChar *)coll->image + getContractOffset(CE); 2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (multicontraction) { 2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->pos = temppos - 1; 243950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho setDiscontiguosAttribute(source, buffer); 2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(coll->contractionCEs + 2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (tempconstart - coll->contractionIndex)); 2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru constart = tempconstart; 2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharOffset = (UChar *)(tempconstart + 1); /* skip the backward offset*/ 2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru schar = getNextNormalizedChar(source); 2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (schar > (tchar = *UCharOffset)) { 2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharOffset++; 2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (schar != tchar) { 2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not the correct codepoint. we stuff the current codepoint into 2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru the discontiguos buffer and try the next character */ 245750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.append(schar); 2458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_getCombiningClass(schar) == 246227f654740f2a26ad62a5c155af9199af9e69b889claireho u_getCombiningClass(peekCodePoint(source, -2))) { 246350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.append(schar); 2464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = *(coll->contractionCEs + 2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result == UCOL_NOT_FOUND) { 2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (isContraction(result)) { 2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* this is a multi-contraction*/ 2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempconstart = (UChar *)coll->image + getContractOffset(result); 2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(coll->contractionCEs + (constart - coll->contractionIndex)) 2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru != UCOL_NOT_FOUND) { 2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru multicontraction = TRUE; 2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temppos = source->pos + 1; 2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 248150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho setDiscontiguosAttribute(source, buffer); 2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no problems simply reverting just like that, 2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if we are in string before getting into this function, points back to 2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string hence no problem. 2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if we are in normalization buffer before getting into this function, 2490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru since we'll never use another normalization within this function, we 2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru know that fcdposition points to a base character. the normalization buffer 2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru never change, hence this revert works. */ 2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loadState(source, &discState, TRUE); 2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goBackOne(source); 2495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //source->pos = temppos - 1; 2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source->flags = tempflags; 2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(coll->contractionCEs + (constart - coll->contractionIndex)); 2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* now uses Mark's getImplicitPrimary code */ 2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) { 2504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t r = uprv_uca_getImplicitPrimary(cp); 2505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0; 2506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->offsetRepeatCount += 1; 2507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order' 2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument character into the front of the buffer replacing the 2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* front null terminator. 2513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data 2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch character to be appended 2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 251750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline void insertBufferFront(collIterate *data, UChar ch) 2518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTerminatedBuffer() + 2; 2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Special normalization function for contraction in the previous iterator. 2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This normalization sequence will place the current character at source->pos 2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and its following normalized sequence into the buffer. 2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The fcd position, pos will be changed. 2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* pos will now point to positions in the buffer. 2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Flags will be changed accordingly. 2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data 2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizePrevContraction(collIterate *data, UErrorCode *status) 2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pEnd = data->pos + 1; /* End normalize + 1 */ 253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pStart; 2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString endOfBuffer; 2538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->flags & UCOL_ITER_HASLEN) { 2539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalization buffer not used yet, we'll pull down the next 2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru character into the end of the buffer 2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 254350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho endOfBuffer.setTo(*pEnd); 2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho endOfBuffer.setTo(data->writableBuffer, 1); // after the leading NUL 2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->fcdPosition == NULL) { 2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStart = data->string; 2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStart = data->fcdPosition + 1; 2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 255550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t normLen = 255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), 255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer, 255850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status). 255950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length(); 256050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 256150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 2562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this puts the null termination infront of the normalized string instead 2565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru of the end 2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 256750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->pos = 256850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminatedBuffer() + 256950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1 + normLen; 2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->origFlags = data->flags; 2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags |= UCOL_ITER_INNORMBUF; 2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 2573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Contraction character management function that returns the previous character 2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* for the backwards iterator. 2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Does nothing if the previous character is in buffer and not the first 2579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* character in it. 2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Else it checks previous character in data string to see if it is 2581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* normalizable. 2582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* If it is not, the character is simply copied into the buffer, else 2583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the whole normalized substring is copied into the buffer, including the 2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* current character. 2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data 2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return previous character 2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status) 2590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar prevch; 2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ch; 259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *start; 2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF); 2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 || 2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (innormbuf && *(data->pos - 1) != 0)) { 2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if no normalization. 2599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if previous character is in normalized buffer, no further normalization 2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru is required 2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data->flags & UCOL_USE_ITERATOR) { 2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->iterator->move(data->iterator, -1, UITER_CURRENT); 2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar)data->iterator->next(data->iterator); 2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos - 1); 2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start = data->pos; 2611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((data->fcdPosition==NULL)||(data->flags & UCOL_ITER_HASLEN)) { 2612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* in data string */ 2613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((start - 1) == data->string) { 2614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(start - 1); 2615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start --; 2617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *start; 2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevch = *(start - 1); 2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in writable buffer, at this point fcdPosition can not be NULL. 2623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru see contracting tag. 2624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->fcdPosition == data->string) { 2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* at the start of the string, just dump it into the normalizer */ 262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho insertBufferFront(data, *(data->fcdPosition)); 2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition = NULL; 2629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos - 1); 2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start = data->fcdPosition; 2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *start; 2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevch = *(start - 1); 2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the current character is not fcd. 2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Trailing combining class == 0. 2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data->fcdPosition > start && 2640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (ch >= NFC_ZERO_CC_BLOCK_LIMIT_ || prevch >= NFC_ZERO_CC_BLOCK_LIMIT_)) 2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Need a more complete FCD check and possible normalization. 2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalize substring will be appended to buffer 2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 264650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *backuppos = data->pos; 2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = start; 2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (collPrevIterFCD(data)) { 2649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizePrevContraction(data, status); 2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *(data->pos - 1); 2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->pos = backuppos; 2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition ++; 2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (innormbuf) { 2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru no normalization is to be done hence only one character will be 2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appended to the buffer. 2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 266150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho insertBufferFront(data, ch); 2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data->fcdPosition --; 2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ch; 2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function handles the special CEs like contractions, expansions, surrogates, Thai */ 2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* It is called by getNextCE */ 2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2671b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* The following should be even */ 2672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define UCOL_MAX_DIGITS_FOR_NUMBER 254 2673b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, collIterate *source, UErrorCode *status) { 2675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState entryState; 2676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &entryState); 2677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 cp = ch; 2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 2680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This loop will repeat only in the case of contractions, and only when a contraction 2681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // is found and the first CE resulting from that contraction is itself a special 2682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (an expansion, for example.) All other special CE types are fully handled the 2683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // first time through, and the loop exits. 2684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const uint32_t *CEOffset = NULL; 2686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch(getCETag(CE)) { 2687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case NOT_FOUND_TAG: 2688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This one is not found, and we'll let somebody else bother about it... no more games */ 2689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 2690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SPEC_PROC_TAG: 2691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Special processing is getting a CE that is preceded by a certain prefix 2693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Currently this is only needed for optimizing Japanese length and iteration marks. 2694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // When we encouter a special processing tag, we go backwards and try to see if 2695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we have a match. 2696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Contraction tables are used - so the whole process is not unlike contraction. 2697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // prefix data is stored backwards in the table. 2698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset; 2699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar, tchar; 2700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState prefixState; 2701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &prefixState); 2702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &entryState, TRUE); 2703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); // We want to look at the point where we entered - actually one 2704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // before that... 2705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 2707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This loop will run once per source string character, for as long as we 2708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are matching a potential contraction sequence 2709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // First we position ourselves at the begining of contraction sequence 2711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE); 2712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collIter_bos(source)) { 2713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); 2714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = getPrevNormalizedChar(source, status); 2717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ 2720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset++; 2721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (schar == tchar) { 2724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Found the source string char in the table. 2725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pick up the corresponding CE from the table. 2726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 2728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 2730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Source string char was not in the table. 2732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have not found the prefix. 2733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (ContractionStart - coll->contractionIndex)); 2735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isPrefix(CE)) { 2738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The source string char was in the contraction table, and the corresponding 2739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE is not a prefix CE. We found the prefix, break 2740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // out of loop, this CE will end up being returned. This is the normal 2741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // way out of prefix handling when the source actually contained 2742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the prefix. 2743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != UCOL_NOT_FOUND) { // we found something and we can merilly continue 2747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &prefixState, TRUE); 2748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->origFlags & UCOL_USE_ITERATOR) { 2749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags = source->origFlags; 2750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // prefix search was a failure, we have to backup all the way to the start 2752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &entryState, TRUE); 2753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CONTRACTION_TAG: 2757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This should handle contractions */ 2759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState state; 2760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &state); 2761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t firstCE = *(coll->contractionCEs + ((UChar *)coll->image+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND; 2762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset; 2763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar, tchar; 2764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 2766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This loop will run once per source string character, for as long as we */ 2767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* are matching a potential contraction sequence */ 2768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* First we position ourselves at the begining of contraction sequence */ 2770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE); 2771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collIter_eos(source)) { 2773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Ran off the end of the source string. 2774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); 2775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // So we'll pick whatever we have at the point... 2776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (CE == UCOL_NOT_FOUND) { 2777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // back up the source over all the chars we scanned going into this contraction. 2778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = firstCE; 2779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, TRUE); 2780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->origFlags & UCOL_USE_ITERATOR) { 2781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags = source->origFlags; 2782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t maxCC = (uint8_t)(*(UCharOffset)&0xFF); /*get the discontiguos stuff */ /* skip the backward offset, see above */ 2788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8); 2789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = getNextNormalizedChar(source); 2791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ 2792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset++; 2793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (schar == tchar) { 2796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Found the source string char in the contraction table. 2797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pick up the corresponding CE from the table. 2798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 2800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 2802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Source string char was not in contraction table. 2804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Unless we have a discontiguous contraction, we have finished 2805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // with this contraction. 2806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // in order to do the proper detection, we 2807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // need to see if we're dealing with a supplementary 2808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We test whether the next two char are surrogate pairs. 2809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * This test is done if the iterator is not NULL. 2810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * If there is no surrogate pair, the iterator 2811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * goes back one if needed. */ 2812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 miss = schar; 2813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->iterator) { 2814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 surrNextChar; /* the next char in the iteration to test */ 2815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t prevPos; /* holds the previous position before move forward of the source iterator */ 2816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U16_IS_LEAD(schar) && source->iterator->hasNext(source->iterator)) { 2817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prevPos = source->iterator->index; 2818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru surrNextChar = getNextNormalizedChar(source); 2819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_TRAIL(surrNextChar)) { 2820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru miss = U16_GET_SUPPLEMENTARY(schar, surrNextChar); 2821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (prevPos < source->iterator->index){ 2822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (U16_IS_LEAD(schar)) { 2826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source)); 2827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t sCC; 2830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (miss < 0x300 || 2831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxCC == 0 || 2832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (sCC = i_getCombiningClass(miss, coll)) == 0 || 2833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCC>maxCC || 2834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (allSame != 0 && sCC == maxCC) || 2835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIter_eos(source)) 2836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Contraction can not be discontiguous. 2838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); // back up the source string by one, 2839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // because the character we just looked at was 2840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // not part of the contraction. */ 2841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_IS_SUPPLEMENTARY(miss)) { 2842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (ContractionStart - coll->contractionIndex)); 2846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 2847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Contraction is possibly discontiguous. 2849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Scan more of source string looking for a match 2850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar tempchar; 2852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* find the next character if schar is not a base character 2853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru and we are not yet at the end of the string */ 2854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempchar = getNextNormalizedChar(source); 2855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // probably need another supplementary thingie here 2856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (i_getCombiningClass(tempchar, coll) == 0) { 2858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_IS_SUPPLEMENTARY(miss)) { 2860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Spit out the last char of the string, wasn't tasty enough */ 2863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 2864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (ContractionStart - coll->contractionIndex)); 2865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 2866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = getDiscontiguous(coll, source, ContractionStart); 2867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } // else after if(schar == tchar) 2870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE == UCOL_NOT_FOUND) { 2872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* The Source string did not match the contraction that we were checking. */ 2873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Back up the source position to undo the effects of having partially */ 2874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* scanned through what ultimately proved to not be a contraction. */ 2875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, TRUE); 2876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = firstCE; 2877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContraction(CE)) { 2881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The source string char was in the contraction table, and the corresponding 2882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE is not a contraction CE. We completed the contraction, break 2883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // out of loop, this CE will end up being returned. This is the normal 2884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // way out of contraction handling when the source actually contained 2885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the contraction. 2886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The source string char was in the contraction table, and the corresponding 2891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE is IS a contraction CE. We will continue looping to check the source 2892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // string for the remaining chars in the contraction. 2893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex)); 2894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tempCE != UCOL_NOT_FOUND) { 2895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have scanned a a section of source string for which there is a 2896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE from the contraction table. Remember the CE and scan position, so 2897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that we can return to this point if further scanning fails to 2898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // match a longer contraction sequence. 2899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru firstCE = tempCE; 2900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 2902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &state); 2903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru getNextNormalizedChar(source); 2904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Another way to do this is: 2906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //collIterateState tempState; 2907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //backupState(source, &tempState); 2908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //goBackOne(source); 2909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //backupState(source, &state); 2910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //loadState(source, &tempState, TRUE); 2911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The problem is that for incomplete contractions we have to remember the previous 2913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // position. Before, the only thing I needed to do was state.pos--; 2914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // After iterator introduction and especially after introduction of normalizing 2915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // iterators, it became much more difficult to decrease the saved state. 2916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // I'm not yet sure which of the two methods above is faster. 2917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } // for(;;) 2919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } // case CONTRACTION_TAG: 2921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case LONG_PRIMARY_TAG: 2922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER; 2924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON; 2925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetRepeatCount += 1; 2926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 2927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case EXPANSION_TAG: 2929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 2930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This should handle expansion. */ 2931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* NOTE: we can encounter both continuations and expansions in an expansion! */ 2932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* I have to decide where continuations are going to be dealt with */ 2933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t size; 2934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i; /* general counter */ 2935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */ 2937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru size = getExpansionCount(CE); 2938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *CEOffset++; 293927f654740f2a26ad62a5c155af9199af9e69b889claireho //source->offsetRepeatCount = -1; 2940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */ 2942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 1; i<size; i++) { 2943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = *CEOffset++; 294427f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetRepeatCount += 1; 2945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* else, we do */ 2947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(*CEOffset != 0) { 2948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = *CEOffset++; 294927f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetRepeatCount += 1; 2950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 2954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case DIGIT_TAG: 2956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 2957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 2958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We do a check to see if we want to collate digits as numbers; if so we generate 2959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru a custom collation key. Otherwise we pull out the value stored in the expansion table. 2960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 2961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //uint32_t size; 2962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i; /* general counter */ 2963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->coll->numericCollation == UCOL_ON){ 2965b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collIterateState digitState = {0,0,0,0,0,0,0,0,0}; 2966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 char32 = 0; 2967b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t digVal = 0; 2968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t digIndx = 0; 2970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t endIndex = 0; 2971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t trailingZeroIndex = 0; 2972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t collateVal = 0; 2974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool nonZeroValReached = FALSE; 2976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2977b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3]; // I just need a temporary place to store my generated CEs. 2978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 2979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We parse the source string until we hit a char that's NOT a digit. 2980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Use this u_charDigitValue. This might be slow because we have to 2981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru handle surrogates... 2982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 2983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 2984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_LEAD(ch)){ 2985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!collIter_eos(source)) { 2986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &digitState); 2987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar trail = getNextNormalizedChar(source); 2988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 2989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = U16_GET_SUPPLEMENTARY(ch, trail); 2990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 2991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &digitState, TRUE); 2992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 2993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 2995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 2996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char32 = ch; 2999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digVal = u_charDigitValue(char32); 3001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digVal = u_charDigitValue(cp); // if we have arrived here, we have 3003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // already processed possible supplementaries that trigered the digit tag - 3004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // all supplementaries are marked in the UCA. 3005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We pad a zero in front of the first element anyways. This takes 3007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru care of the (probably) most common case where people are sorting things followed 3008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru by a single digit 3009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx++; 3011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;){ 3012b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Make sure we have enough space. No longer needed; 3013b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // at this point digIndx now has a max value of UCOL_MAX_DIGITS_FOR_NUMBER 3014b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // (it has been pre-incremented) so we just ensure that numTempBuf is big enough 3015b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3). 3016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Skipping over leading zeroes. 3018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (digVal != 0) { 3019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru nonZeroValReached = TRUE; 3020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (nonZeroValReached) { 3022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We parse the digit string into base 100 numbers (this fits into a byte). 3024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We only add to the buffer in twos, thus if we are parsing an odd character, 3025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru that serves as the 'tens' digit while the if we are parsing an even one, that 3026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into 3027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid 3028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less 3029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru than all the other bytes. 3030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (digIndx % 2 == 1){ 3033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collateVal += (uint8_t)digVal; 3034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We don't enter the low-order-digit case unless we've already seen 3036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the high order, or for the first digit, which is always non-zero. 3037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collateVal != 0) 3038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroIndex = 0; 3039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; 3041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collateVal = 0; 3042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else{ 3044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We drop the collation value into the buffer so if we need to do 3045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a "front patch" we don't have to check to see if we're hitting the 3046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // last element. 3047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collateVal = (uint8_t)(digVal * 10); 3048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for trailing zeroes. 3050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collateVal == 0) 3051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!trailingZeroIndex) 3053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroIndex = (digIndx/2) + 2; 3054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 3056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroIndex = 0; 3057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; 3059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx++; 3061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Get next character. 3064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!collIter_eos(source)){ 3065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ch = getNextNormalizedChar(source); 3066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_LEAD(ch)){ 3067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!collIter_eos(source)) { 3068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &digitState); 3069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar trail = getNextNormalizedChar(source); 3070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 3071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = U16_GET_SUPPLEMENTARY(ch, trail); 3072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &digitState, TRUE); 3074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3081b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((digVal = u_charDigitValue(char32)) == -1 || digIndx > UCOL_MAX_DIGITS_FOR_NUMBER){ 3082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Resetting position to point to the next unprocessed char. We 3083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // overshot it when doing our test/set for numbers. 3084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (char32 > 0xFFFF) { // For surrogates. 3085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &digitState, TRUE); 3086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //goBackOne(source); 3087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 3089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (nonZeroValReached == FALSE){ 3097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx = 2; 3098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[2] = 6; 3099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ; 3102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (digIndx % 2 != 0){ 3103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what 3105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward. 3106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a 3107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru single pass and optimizes for strings with single digits. I'm just assuming that's the more common case. 3108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 2; i < endIndex; i++){ 3111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[i] = (((((numTempBuf[i] - 6)/2) % 10) * 10) + 3112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6; 3113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru --digIndx; 3115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Subtract one off of the last byte. 3118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[endIndex-1] -= 1; 3119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We want to skip over the first two slots in the buffer. The first slot 3122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the 3123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sign/exponent byte: 0x80 + (decimalPos/2) & 7f. 3124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[0] = UCOL_CODAN_PLACEHOLDER; 3126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F)); 3127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Now transfer the collation key to our collIterate struct. 3129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The total size for our collation key is endIndx bumped up to the next largest even value divided by two. 3130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //size = ((endIndex+1) & ~1)/2; 3131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight 3132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight 3133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_BYTE_COMMON; // Tertiary weight. 3134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = 2; // Reset the index into the buffer. 3135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i < endIndex) 3136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t primWeight = numTempBuf[i++] << 8; 3138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( i < endIndex) 3139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primWeight |= numTempBuf[i++]; 3140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER; 3141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // no numeric mode, we'll just switch to whatever we stashed and continue 3145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */ 3146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *CEOffset++; 3147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 3150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* various implicits optimization */ 3152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case IMPLICIT_TAG: /* everything that is not defined otherwise */ 3153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* UCA is filled with these. Tailorings are NOT_FOUND */ 3154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getImplicit(cp, source); 3155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/ 3156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImplicit 3157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getImplicit(cp, source); 3158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/ 3159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t 3161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7; 3162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t LCount = 19; 3163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t VCount = 21; 3164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t TCount = 28; 3165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t NCount = VCount * TCount; // 588 3166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t SCount = LCount * NCount; // 11172 3167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t L = ch - SBase; 3168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // divide into pieces 3170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t T = L % TCount; // we do it in this order since some compilers can do % and / in one operation 3172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L /= TCount; 3173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t V = L % VCount; 3174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L /= VCount; 3175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // offset them 3177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L += LBase; 3179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru V += VBase; 3180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru T += TBase; 3181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // return the first CE, but first put the rest into the expansion buffer 3183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!source->coll->image->jamoSpecial) { // FAST PATH 3184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V); 3186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (T != TBase) { 3187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T); 3188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UTRIE_GET32_FROM_LEAD(&coll->mapping, L); 3191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // Jamo is Special 3193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Since Hanguls pass the FCD check, it is 3194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // guaranteed that we won't be in 3195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the normalization buffer if something like this happens 3196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // However, if we are using a uchar iterator and normalization 3197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // is ON, the Hangul that lead us here is going to be in that 3198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // normalization buffer. Here we want to restore the uchar 3199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // iterator state and pull out of the normalization buffer 3200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) { 3201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags = source->origFlags; // restore the iterator 3202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->pos = NULL; 3203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Move Jamos into normalization buffer 320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *buffer = source->writableBuffer.getBuffer(4); 320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t bufferLength; 320750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer[0] = (UChar)L; 320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer[1] = (UChar)V; 3209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (T != TBase) { 321050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer[2] = (UChar)T; 321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bufferLength = 3; 3212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bufferLength = 2; 3214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->writableBuffer.releaseBuffer(bufferLength); 3216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->fcdPosition = source->pos; // Indicate where to continue in main input string 3218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // after exhausting the writableBuffer 321950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->pos = source->writableBuffer.getTerminatedBuffer(); 3220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->origFlags = source->flags; 3221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags |= UCOL_ITER_INNORMBUF; 3222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 3223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(UCOL_IGNORABLE); 3225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SURROGATE_TAG: 3228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we encountered a leading surrogate. We shall get the CE by using the following code unit */ 3229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* two things can happen here: next code point can be a trailing surrogate - we will use it */ 3230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* to retrieve the CE, or it is not a trailing surrogate (or the string is done). In that case */ 323127f654740f2a26ad62a5c155af9199af9e69b889claireho /* we treat it like an unassigned code point. */ 3232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar trail; 3234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState state; 3235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &state); 3236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) { 3237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we chould have stepped one char forward and it might have turned that it 3238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // was not a trail surrogate. In that case, we have to backup. 3239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, TRUE); 324027f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 3241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* TODO: CE contain the data from the previous CE + the mask. It should at least be unmasked */ 3243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFFFF, trail); 3244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates in this block, but not this one. 3245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We need to backup 3246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, TRUE); 3247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 3248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // calculate the supplementary code point value, if surrogate was not tailored 3250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000)); 3251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case LEAD_SURROGATE_TAG: /* D800-DBFF*/ 3255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar nextChar; 3256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if( source->flags & UCOL_USE_ITERATOR) { 3257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source->iterator))) { 3258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cp = U16_GET_SUPPLEMENTARY(ch, nextChar); 3259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->iterator->next(source->iterator); 3260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getImplicit(cp, source); 3261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) || (source->pos<source->endp)) && 326327f654740f2a26ad62a5c155af9199af9e69b889claireho U_IS_TRAIL((nextChar=*source->pos))) { 326427f654740f2a26ad62a5c155af9199af9e69b889claireho cp = U16_GET_SUPPLEMENTARY(ch, nextChar); 326527f654740f2a26ad62a5c155af9199af9e69b889claireho source->pos++; 326627f654740f2a26ad62a5c155af9199af9e69b889claireho return getImplicit(cp, source); 3267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 326827f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 3269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/ 327027f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; /* broken surrogate sequence */ 3271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CHARSET_TAG: 3272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* not yet implemented */ 3273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* probably after 1.8 */ 3274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_NOT_FOUND; 3275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: 3276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INTERNAL_PROGRAM_ERROR; 3277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE=0; 3278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (CE <= UCOL_NOT_FOUND) break; 3281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CE; 3283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* now uses Mark's getImplicitPrimary code */ 3287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 3288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) { 3289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t r = uprv_uca_getImplicitPrimary(cp); 3290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505; 3292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collationSource->toReturn = collationSource->CEpos; 3293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 329427f654740f2a26ad62a5c155af9199af9e69b889claireho // **** doesn't work if using iterator **** 329527f654740f2a26ad62a5c155af9199af9e69b889claireho if (collationSource->flags & UCOL_ITER_INNORMBUF) { 329627f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->offsetRepeatCount = 1; 329727f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 329827f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->string); 3299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 330027f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode = U_ZERO_ERROR; 330127f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->appendOffset(firstOffset, errorCode); 330227f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->appendOffset(firstOffset + 1, errorCode); 3303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 330427f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->offsetReturn = collationSource->offsetStore - 1; 330527f654740f2a26ad62a5c155af9199af9e69b889claireho *(collationSource->offsetBuffer) = firstOffset; 330627f654740f2a26ad62a5c155af9199af9e69b889claireho if (collationSource->offsetReturn == collationSource->offsetBuffer) { 330727f654740f2a26ad62a5c155af9199af9e69b889claireho collationSource->offsetStore = collationSource->offsetBuffer; 330827f654740f2a26ad62a5c155af9199af9e69b889claireho } 330927f654740f2a26ad62a5c155af9199af9e69b889claireho } 3310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return ((r & 0x0000FFFF)<<16) | 0x000000C0; 3312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 3315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function handles the special CEs like contractions, expansions, 3316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * surrogates, Thai. 3317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is called by both getPrevCE 3318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 3319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, 3320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate *source, 3321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 3322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const uint32_t *CEOffset = NULL; 3324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *UCharOffset = NULL; 3325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar; 3326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *constart = NULL; 3327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t size; 3328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar buffer[UCOL_MAX_BUFFER]; 3329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *endCEBuffer; 3330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *strbuffer; 3331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t noChars = 0; 3332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t CECount = 0; 3333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) 3335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 3336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* the only ces that loops are thai and contractions */ 3337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch (getCETag(CE)) 3338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case NOT_FOUND_TAG: /* this tag always returns */ 3340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SPEC_PROC_TAG: 3343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Special processing is getting a CE that is preceded by a certain prefix 3345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Currently this is only needed for optimizing Japanese length and iteration marks. 3346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // When we encouter a special processing tag, we go backwards and try to see if 3347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we have a match. 3348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Contraction tables are used - so the whole process is not unlike contraction. 3349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // prefix data is stored backwards in the table. 3350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset; 3351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar, tchar; 3352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterateState prefixState; 3353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backupState(source, &prefixState); 3354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 3355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This loop will run once per source string character, for as long as we 3356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are matching a potential contraction sequence 3357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // First we position ourselves at the begining of contraction sequence 3359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE); 3360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (collIter_bos(source)) { 3362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); 3363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = getPrevNormalizedChar(source, status); 3366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 3367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ 3369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset++; 3370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (schar == tchar) { 3373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Found the source string char in the table. 3374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pick up the corresponding CE from the table. 3375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 3376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 3377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 3379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if there is a completely ignorable code point in the middle of 3381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a prefix, we need to act as if it's not there 3382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero) 3383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // lone surrogates cannot be set to zero as it would break other processing 3384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar); 3385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // it's easy for BMP code points 3386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isZeroCE == 0) { 3387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 338827f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(U16_IS_SURROGATE(schar)) { 3389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // for supplementary code points, we have to check the next one 3390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // situations where we are going to ignore 3391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1. beginning of the string: schar is a lone surrogate 3392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2. schar is a lone surrogate 3393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 3. schar is a trail surrogate in a valid surrogate sequence 3394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that is explicitly set to zero. 3395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!collIter_bos(source)) { 3396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar lead; 339727f654740f2a26ad62a5c155af9199af9e69b889claireho if(!U16_IS_SURROGATE_LEAD(schar) && U16_IS_LEAD(lead = getPrevNormalizedChar(source, status))) { 3398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, lead); 339927f654740f2a26ad62a5c155af9199af9e69b889claireho if(isSpecial(isZeroCE) && getCETag(isZeroCE) == SURROGATE_TAG) { 3400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar); 3401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(finalCE == 0) { 3402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // this is a real, assigned completely ignorable code point 3403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 3404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 3405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 340827f654740f2a26ad62a5c155af9199af9e69b889claireho // lone surrogate, treat like unassigned 340927f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 3410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 341227f654740f2a26ad62a5c155af9199af9e69b889claireho // lone surrogate at the beggining, treat like unassigned 341327f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 3414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Source string char was not in the table. 3417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have not found the prefix. 3418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 3419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (ContractionStart - coll->contractionIndex)); 3420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isPrefix(CE)) { 3423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The source string char was in the contraction table, and the corresponding 3424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // CE is not a prefix CE. We found the prefix, break 3425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // out of loop, this CE will end up being returned. This is the normal 3426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // way out of prefix handling when the source actually contained 3427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the prefix. 3428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &prefixState, TRUE); 3432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 343550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case CONTRACTION_TAG: { 3436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* to ensure that the backwards and forwards iteration matches, we 3437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru take the current region of most possible match and pass it through 3438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru the forward iteration. this will ensure that the obstinate problem of 3439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru overlapping contractions will not occur. 3440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 344127f654740f2a26ad62a5c155af9199af9e69b889claireho schar = peekCodeUnit(source, 0); 3442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru constart = (UChar *)coll->image + getContractOffset(CE); 3443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isAtStartPrevIterate(source) 3444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* commented away contraction end checks after adding the checks 3445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru in getPrevCE */) { 3446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* start of string or this is not the end of any contraction */ 3447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 3448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (constart - coll->contractionIndex)); 3449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strbuffer = buffer; 3452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1); 3453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(UCharOffset --) = 0; 3454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru noChars = 0; 3455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // have to swap thai characters 3456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (ucol_unsafeCP(schar, coll)) { 3457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(UCharOffset) = schar; 3458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru noChars++; 3459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset --; 3460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = getPrevNormalizedChar(source, status); 3461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goBackOne(source); 3462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: when we exhaust the contraction buffer, 3463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // it needs to get reallocated. The problem is 3464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that the size depends on the string which is 3465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // not iterated over. However, since we're travelling 3466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // backwards, we already had to set the iterator at 3467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the end - so we might as well know where we are? 3468c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (UCharOffset + 1 == buffer) { 3469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we have exhausted the buffer */ 3470c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t newsize = 0; 3471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->pos) { // actually dealing with a position 347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newsize = (int32_t)(source->pos - source->string + 1); 3473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // iterator 3474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newsize = 4 * UCOL_MAX_BUFFER; 3475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strbuffer = (UChar *)uprv_malloc(sizeof(UChar) * 3477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (newsize + UCOL_MAX_BUFFER)); 3478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 3479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strbuffer == NULL) { 3480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 3481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_NO_MORE_CES; 3482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset = strbuffer + newsize; 3484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(UCharOffset, buffer, 3485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_MAX_BUFFER * sizeof(UChar)); 3486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset --; 3487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((source->pos && (source->pos == source->string || 3489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ((source->flags & UCOL_ITER_INNORMBUF) && 3490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->pos - 1) == 0 && source->fcdPosition == NULL))) 3491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (source->iterator && !source->iterator->hasPrevious(source->iterator))) { 3492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* adds the initial base character to the string */ 3496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(UCharOffset) = schar; 3497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru noChars++; 3498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t offsetBias; 3500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // **** doesn't work if using iterator **** 3502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->flags & UCOL_ITER_INNORMBUF) { 3503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru offsetBias = -1; 3504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru offsetBias = (int32_t)(source->pos - source->string); 3506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* a new collIterate is used to simplify things, since using the current 3509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate will mean that the forward and backwards iteration will 3510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru share and change the same buffers. we don't want to get into that. */ 3511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate temp; 3512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t rawOffset; 3513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 351450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, UCharOffset, noChars, &temp, status); 351550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 351650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_NULLORDER; 351750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru temp.flags &= ~UCOL_ITER_NORM; 3519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru temp.flags |= source->flags & UCOL_FORCE_HAN_IMPLICIT; 3520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 352150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho rawOffset = (int32_t)(temp.pos - temp.string); // should always be zero? 3522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &temp, status); 3523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->extendCEs) { 3525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endCEBuffer = source->extendCEs + source->extendCEsSize; 352650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CECount = (int32_t)((source->CEpos - source->extendCEs)/sizeof(uint32_t)); 3527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE; 352950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CECount = (int32_t)((source->CEpos - source->CEs)/sizeof(uint32_t)); 3530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (CE != UCOL_NO_MORE_CES) { 3533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos ++) = CE; 3534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (offsetBias >= 0) { 353627f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(rawOffset + offsetBias, *status); 3537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CECount++; 3540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->CEpos == endCEBuffer) { 3541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* ran out of CE space, reallocate to new buffer. 3542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru If reallocation fails, reset pointers and bail out, 3543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru there's no guarantee of the right character position after 3544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru this bail*/ 354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!increaseCEsCapacity(source)) { 3546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 354727f654740f2a26ad62a5c155af9199af9e69b889claireho break; 3548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endCEBuffer = source->extendCEs + source->extendCEsSize; 3551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) { 355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho rawOffset = (int32_t)(temp.fcdPosition - temp.string); 3555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho rawOffset = (int32_t)(temp.pos - temp.string); 3557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &temp, status); 3560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 356227f654740f2a26ad62a5c155af9199af9e69b889claireho if (strbuffer != buffer) { 356327f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(strbuffer); 356427f654740f2a26ad62a5c155af9199af9e69b889claireho } 356527f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(*status)) { 356627f654740f2a26ad62a5c155af9199af9e69b889claireho return (uint32_t)UCOL_NULLORDER; 356727f654740f2a26ad62a5c155af9199af9e69b889claireho } 356827f654740f2a26ad62a5c155af9199af9e69b889claireho 356927f654740f2a26ad62a5c155af9199af9e69b889claireho if (source->offsetRepeatValue != 0) { 3570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (CECount > noChars) { 357127f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetRepeatCount += temp.offsetRepeatCount; 3572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // **** does this really skip the right offsets? **** 3574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetReturn -= (noChars - CECount); 3575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (offsetBias >= 0) { 3579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetReturn = source->offsetStore - 1; 3580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->offsetReturn == source->offsetBuffer) { 3581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetStore = source->offsetBuffer; 3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos - 1; 3586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->toReturn == source->CEs) { 3587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->CEpos = source->CEs; 3588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->toReturn); 359150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case LONG_PRIMARY_TAG: 3593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON; 3595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER; 3596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos - 1; 3597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 359827f654740f2a26ad62a5c155af9199af9e69b889claireho if (source->flags & UCOL_ITER_INNORMBUF) { 3599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetRepeatCount = 1; 360027f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 360127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t firstOffset = (int32_t)(source->pos - source->string); 3602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 360327f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset, *status); 360427f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 3605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 360627f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetReturn = source->offsetStore - 1; 360727f654740f2a26ad62a5c155af9199af9e69b889claireho *(source->offsetBuffer) = firstOffset; 360827f654740f2a26ad62a5c155af9199af9e69b889claireho if (source->offsetReturn == source->offsetBuffer) { 360927f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetStore = source->offsetBuffer; 361027f654740f2a26ad62a5c155af9199af9e69b889claireho } 361127f654740f2a26ad62a5c155af9199af9e69b889claireho } 3612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->toReturn); 3615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case EXPANSION_TAG: /* this tag always returns */ 3618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru This should handle expansion. 3621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NOTE: we can encounter both continuations and expansions in an expansion! 3622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru I have to decide where continuations are going to be dealt with 3623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t firstOffset = (int32_t)(source->pos - source->string); 3625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // **** doesn't work if using iterator **** 3627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->offsetReturn != NULL) { 3628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetReturn == source->offsetBuffer) { 3629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetStore = source->offsetBuffer; 3630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }else { 3631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru firstOffset = -1; 3632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* find the offset to expansion table */ 3636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE); 3637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru size = getExpansionCount(CE); 3638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (size != 0) { 3639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if there are less than 16 elements in expansion, we don't terminate 3641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t count; 3643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (count = 0; count < size; count++) { 3645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos ++) = *CEOffset++; 3646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (firstOffset >= 0) { 364827f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 3649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* else, we do */ 3653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (*CEOffset != 0) { 3654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos ++) = *CEOffset ++; 3655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (firstOffset >= 0) { 365727f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 3658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (firstOffset >= 0) { 3663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetReturn = source->offsetStore - 1; 3664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->offsetBuffer) = firstOffset; 3665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->offsetReturn == source->offsetBuffer) { 3666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetStore = source->offsetBuffer; 3667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->offsetRepeatCount += size - 1; 3670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos - 1; 3673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // in case of one element expansion, we 3674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // want to immediately return CEpos 3675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source->toReturn == source->CEs) { 3676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->CEpos = source->CEs; 3677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->toReturn); 3680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case DIGIT_TAG: 3683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We do a check to see if we want to collate digits as numbers; if so we generate 3686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru a custom collation key. Otherwise we pull out the value stored in the expansion table. 3687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i; /* general counter */ 3689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->coll->numericCollation == UCOL_ON){ 3691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t digIndx = 0; 3692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t endIndex = 0; 3693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t leadingZeroIndex = 0; 3694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t trailingZeroCount = 0; 3695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t collateVal = 0; 3697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool nonZeroValReached = FALSE; 3699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2]; // I just need a temporary place to store my generated CEs. 3701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We parse the source string until we hit a char that's NOT a digit. 3703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Use this u_charDigitValue. This might be slow because we have to 3704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru handle surrogates... 3705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* 3707b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru We need to break up the digit string into collection elements of UCOL_MAX_DIGITS_FOR_NUMBER or less, 3708b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru with any chunks smaller than that being on the right end of the digit string - i.e. the first collation 3709b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru element we process when going backward. To determine how long that chunk might be, we may need to make 3710b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru two passes through the loop that collects digits - one to see how long the string is (and how much is 3711b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru leading zeros) to determine the length of that right-hand chunk, and a second (if the whole string has 3712b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru more than UCOL_MAX_DIGITS_FOR_NUMBER non-leading-zero digits) to actually process that collation 3713b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru element chunk after resetting the state to the initialState at the right side of the digit string. 3714b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 3715b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t ceLimit = 0; 3716b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar initial_ch = ch; 3717b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collIterateState initialState = {0,0,0,0,0,0,0,0,0}; 3718b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru backupState(source, &initialState); 3719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3720b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(;;) { 3721b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collIterateState state = {0,0,0,0,0,0,0,0,0}; 3722b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 char32 = 0; 3723b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t digVal = 0; 3724b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3725b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U16_IS_TRAIL (ch)) { 3726b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!collIter_bos(source)){ 3727b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar lead = getPrevNormalizedChar(source, status); 3728b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U16_IS_LEAD(lead)) { 3729b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = U16_GET_SUPPLEMENTARY(lead,ch); 3730b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goBackOne(source); 3731b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 3732b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = ch; 3733b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char32 = ch; 3739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3740b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru digVal = u_charDigitValue(char32); 3741b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3742b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(;;) { 3743b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Make sure we have enough space. No longer needed; 3744b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // at this point the largest value of digIndx when we need to save data in numTempBuf 3745b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // is UCOL_MAX_DIGITS_FOR_NUMBER-1 (digIndx is post-incremented) so we just ensure 3746b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // that numTempBuf is big enough (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2). 3747b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3748b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Skip over trailing zeroes, and keep a count of them. 3749b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (digVal != 0) 3750b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nonZeroValReached = TRUE; 3751b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3752b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (nonZeroValReached) { 3753b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* 3754b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru We parse the digit string into base 100 numbers (this fits into a byte). 3755b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru We only add to the buffer in twos, thus if we are parsing an odd character, 3756b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru that serves as the 'tens' digit while the if we are parsing an even one, that 3757b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into 3758b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid 3759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less 3760b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru than all the other bytes. 3761b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3762b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru Since we're doing in this reverse we want to put the first digit encountered into the 3763b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ones place and the second digit encountered into the tens place. 3764b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 3765b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3766b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((digIndx + trailingZeroCount) % 2 == 1) { 3767b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // High-order digit case (tens place) 3768b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collateVal += (uint8_t)(digVal * 10); 3769b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3770b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // We cannot set leadingZeroIndex unless it has been set for the 3771b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // low-order digit. Therefore, all we can do for the high-order 3772b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // digit is turn it off, never on. 3773b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The only time we will have a high digit without a low is for 3774b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // the very first non-zero digit, so no zero check is necessary. 3775b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (collateVal != 0) 3776b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru leadingZeroIndex = 0; 3777b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3778b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The first pass through, digIndx may exceed the limit, but in that case 3779b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // we no longer care about numTempBuf contents since they will be discarded 3780b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) { 3781b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; 3782b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3783b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collateVal = 0; 3784b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 3785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Low-order digit case (ones place) 3786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collateVal = (uint8_t)digVal; 3787b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Check for leading zeroes. 3789b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (collateVal == 0) { 3790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!leadingZeroIndex) 3791b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru leadingZeroIndex = (digIndx/2) + 2; 3792b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else 3793b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru leadingZeroIndex = 0; 3794b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3795b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // No need to write to buffer; the case of a last odd digit 3796b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // is handled below. 3797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3798b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++digIndx; 3799b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else 3800b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++trailingZeroCount; 3801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!collIter_bos(source)) { 3803b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = getPrevNormalizedChar(source, status); 3804b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //goBackOne(source); 3805b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U16_IS_TRAIL(ch)) { 3806b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru backupState(source, &state); 3807b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!collIter_bos(source)) { 3808b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goBackOne(source); 3809b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar lead = getPrevNormalizedChar(source, status); 3810b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 3811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U16_IS_LEAD(lead)) { 3812b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = U16_GET_SUPPLEMENTARY(lead,ch); 3813b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 3814b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru loadState(source, &state, FALSE); 3815b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = ch; 3816b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3817b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else 3819b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char32 = ch; 3820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3821b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((digVal = u_charDigitValue(char32)) == -1 || (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) { 3822b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (char32 > 0xFFFF) {// For surrogates. 3823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loadState(source, &state, FALSE); 3824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3825b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Don't need to "reverse" the goBackOne call, 3826b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // as this points to the next position to process.. 3827b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //if (char32 > 0xFFFF) // For surrogates. 3828b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //getNextNormalizedChar(source); 3829b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 3830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3832b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goBackOne(source); 3833b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }else 3834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3835b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3837b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_NUMBER) { 3838b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // our collation element is not too big, go ahead and finish with it 3839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 3840b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3841b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // our digit string is too long for a collation element; 3842b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // set the limit for it, reset the state and begin again 3843b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGITS_FOR_NUMBER; 3844b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( ceLimit == 0 ) { 3845b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER; 3846b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 3847b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = initial_ch; 3848b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru loadState(source, &initialState, FALSE); 3849b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru digIndx = endIndex = leadingZeroIndex = trailingZeroCount = 0; 3850b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collateVal = 0; 3851b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nonZeroValReached = FALSE; 3852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (! nonZeroValReached) { 3855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx = 2; 3856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroCount = 0; 3857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[2] = 6; 3858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((digIndx + trailingZeroCount) % 2 != 0) { 3861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6; 3862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx += 1; // The implicit leading zero 3863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (trailingZeroCount % 2 != 0) { 3865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We had to consume one trailing zero for the low digit 3866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // of the least significant byte 3867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru digIndx += 1; // The trailing zero not in the exponent 3868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailingZeroCount -= 1; 3869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ; 3872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Subtract one off of the last byte. Really the first byte here, but it's reversed... 3874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[2] -= 1; 3875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru We want to skip over the first two slots in the buffer. The first slot 3878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the 3879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sign/exponent byte: 0x80 + (decimalPos/2) & 7f. 3880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru The exponent must be adjusted by the number of leading zeroes, and the number of 3881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru trailing zeroes. 3882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[0] = UCOL_CODAN_PLACEHOLDER; 3884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t exponent = (digIndx+trailingZeroCount)/2; 3885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (leadingZeroIndex) 3886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exponent -= ((digIndx/2) + 2 - leadingZeroIndex); 3887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F)); 3888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Now transfer the collation key to our collIterate struct. 389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The total size for our collation key is half of endIndex, rounded up. 389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t size = (endIndex+1)/2; 389250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!ensureCEsCapacity(source, size)) { 389350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_NULLORDER; 389450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight 3896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight 3897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_BYTE_COMMON; // Tertiary weight. 3898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = endIndex - 1; // Reset the index into the buffer. 3899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i >= 2) { 3900b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t primWeight = numTempBuf[i--] << 8; 3901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( i >= 2) 3902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primWeight |= numTempBuf[i--]; 3903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER; 3904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos -1; 3907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *(source->toReturn); 3908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE); 3910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(CEOffset++); 3911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/ 3916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 3917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t 3918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7; 3919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t LCount = 19; 3920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t VCount = 21; 3921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const uint32_t TCount = 28; 3922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t NCount = VCount * TCount; /* 588 */ 3923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //const uint32_t SCount = LCount * NCount; /* 11172 */ 3924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t L = ch - SBase; 3926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru divide into pieces. 3928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru we do it in this order since some compilers can do % and / in one 3929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru operation 3930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t T = L % TCount; 3932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L /= TCount; 3933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t V = L % VCount; 3934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L /= VCount; 3935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* offset them */ 3937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru L += LBase; 3938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru V += VBase; 3939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru T += TBase; 3940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 394127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t firstOffset = (int32_t)(source->pos - source->string); 394227f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset, *status); 3943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * return the first CE, but first put the rest into the expansion buffer 3946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!source->coll->image->jamoSpecial) { 3948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L); 3949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V); 395027f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 3951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 395227f654740f2a26ad62a5c155af9199af9e69b889claireho if (T != TBase) { 3953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T); 395427f654740f2a26ad62a5c155af9199af9e69b889claireho source->appendOffset(firstOffset + 1, *status); 395527f654740f2a26ad62a5c155af9199af9e69b889claireho } 3956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->toReturn = source->CEpos - 1; 3958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 395927f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetReturn = source->offsetStore - 1; 396027f654740f2a26ad62a5c155af9199af9e69b889claireho if (source->offsetReturn == source->offsetBuffer) { 396127f654740f2a26ad62a5c155af9199af9e69b889claireho source->offsetStore = source->offsetBuffer; 396227f654740f2a26ad62a5c155af9199af9e69b889claireho } 396327f654740f2a26ad62a5c155af9199af9e69b889claireho 396427f654740f2a26ad62a5c155af9199af9e69b889claireho return *(source->toReturn); 3965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Since Hanguls pass the FCD check, it is 3967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // guaranteed that we won't be in 3968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the normalization buffer if something like this happens 3969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Move Jamos into normalization buffer 3970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Move the Jamos into the 3972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru normalization buffer 3973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *tempbuffer = source->writableBuffer.getBuffer(5); 397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t tempbufferLength; 397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbuffer[0] = 0; 397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbuffer[1] = (UChar)L; 397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbuffer[2] = (UChar)V; 3979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (T != TBase) { 398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbuffer[3] = (UChar)T; 398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbufferLength = 4; 3982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tempbufferLength = 3; 3984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->writableBuffer.releaseBuffer(tempbufferLength); 3986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 3987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 3988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Indicate where to continue in main input string after exhausting 3989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru the writableBuffer 3990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 3991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source->pos == source->string) { 3992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->fcdPosition = NULL; 3993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 3994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->fcdPosition = source->pos-1; 3995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 3996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source->pos = source->writableBuffer.getTerminatedBuffer() + tempbufferLength; 3998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->origFlags = source->flags; 3999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags |= UCOL_ITER_INNORMBUF; 4000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); 4001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(UCOL_IGNORABLE); 4003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case IMPLICIT_TAG: /* everything that is not defined otherwise */ 4007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getPrevImplicit(ch, source); 4008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: Remove CJK implicits as they are handled by the getImplicitPrimary function 4010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/ 4011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getPrevImplicit(ch, source); 4012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SURROGATE_TAG: /* This is a surrogate pair */ 401427f654740f2a26ad62a5c155af9199af9e69b889claireho /* essentially an engaged lead surrogate. */ 4015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* if you have encountered it here, it means that a */ 4016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* broken sequence was encountered and this is an error */ 401727f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 4018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case LEAD_SURROGATE_TAG: /* D800-DBFF*/ 402027f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; /* broken surrogate sequence */ 4021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/ 4023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 4024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 cp = 0; 4025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar prevChar; 402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prev; 4027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isAtStartPrevIterate(source)) { 4028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we are at the start of the string, wrong place to be at */ 402927f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; 4030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (source->pos != source->writableBuffer.getBuffer()) { 4032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prev = source->pos - 1; 4033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prev = source->fcdPosition; 4035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prevChar = *prev; 4037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Handles Han and Supplementary characters here.*/ 4039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U16_IS_LEAD(prevChar)) { 4040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000)); 4041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru source->pos = prev; 4042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 404327f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_NOT_FOUND; /* like unassigned */ 4044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return getPrevImplicit(cp, source); 4047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* UCA is filled with these. Tailorings are NOT_FOUND */ 4050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* not yet implemented */ 4051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CHARSET_TAG: /* this tag always returns */ 4052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* probably after 1.8 */ 4053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_NOT_FOUND; 4054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: /* this tag always returns */ 4056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INTERNAL_PROGRAM_ERROR; 4057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE=0; 4058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (CE <= UCOL_NOT_FOUND) { 4062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE; 4067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This should really be a macro */ 4070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* However, it is used only when stack buffers are not sufficiently big, and then we're messed up performance wise */ 4071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* anyway */ 4072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 4073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint8_t *reallocateBuffer(uint8_t **secondaries, uint8_t *secStart, uint8_t *second, uint32_t *secSize, uint32_t newSize, UErrorCode *status) { 4074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 4075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "."); 4076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 4077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *newStart = NULL; 407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t offset = (uint32_t)(*secondaries-secStart); 4079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secStart==second) { 4081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newStart=(uint8_t*)uprv_malloc(newSize); 4082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newStart==NULL) { 4083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 4084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 4085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(newStart, secStart, *secondaries-secStart); 4087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newStart=(uint8_t*)uprv_realloc(secStart, newSize); 4089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newStart==NULL) { 4090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 4091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Since we're reallocating, return original reference so we don't loose it. */ 4092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return secStart; 4093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries=newStart+offset; 4096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secSize=newSize; 4097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return newStart; 4098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This should really be a macro */ 4102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function is used to reverse parts of a buffer. We need this operation when doing continuation */ 4103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* secondaries in French */ 4104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 4105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid uprv_ucol_reverse_buffer(uint8_t *start, uint8_t *end) { 4106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t temp; 4107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(start<end) { 4108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp = *start; 4109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *start++ = *end; 4110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *end-- = temp; 4111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 4114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define uprv_ucol_reverse_buffer(TYPE, start, end) { \ 4116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TYPE tempA; \ 4117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruwhile((start)<(end)) { \ 4118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempA = *(start); \ 4119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(start)++ = *(end); \ 4120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(end)-- = tempA; \ 4121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} \ 4122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 4125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the sortkey generation functions */ 4126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 4127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 4128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 4130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Merge two sort keys. 4131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is useful, for example, to combine sort keys from first and last names 4132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to sort such pairs. 4133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Merged sort keys consider on each collation level the first part first entirely, 4134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then the second one. 4135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is possible to merge multiple sort keys by consecutively merging 4136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * another one with the intermediate result. 4137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The length of the merge result is the sum of the lengths of the input sort keys 4139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * minus 1. 4140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src1 the first sort key 4142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src1Length the length of the first sort key, including the zero byte at the end; 4143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * can be -1 if the function is to find the length 4144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src2 the second sort key 4145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src2Length the length of the second sort key, including the zero byte at the end; 4146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * can be -1 if the function is to find the length 4147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest the buffer where the merged sort key is written, 4148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * can be NULL if destCapacity==0 4149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param destCapacity the number of bytes in the dest buffer 4150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the length of the merged sort key, src1Length+src2Length-1; 4151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), 4152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in which cases the contents of dest is undefined 4153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @draft 4155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 4156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 4157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 4158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *src2, int32_t src2Length, 4159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dest, int32_t destCapacity) { 4160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destLength; 4161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t b; 4162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check arguments */ 4164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( src1==NULL || src1Length<-2 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) || 4165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src2==NULL || src2Length<-2 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) || 4166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destCapacity<0 || (destCapacity>0 && dest==NULL) 4167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 4168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* error, attempt to write a zero byte and return 0 */ 4169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dest!=NULL && destCapacity>0) { 4170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest=0; 4171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 4173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check lengths and capacity */ 4176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(src1Length<0) { 4177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src1Length=(int32_t)uprv_strlen((const char *)src1)+1; 4178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(src2Length<0) { 4180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src2Length=(int32_t)uprv_strlen((const char *)src2)+1; 4181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=src1Length+src2Length-1; 4184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destLength>destCapacity) { 4185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the merged sort key does not fit into the destination */ 4186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destLength; 4187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* merge the sort keys with the same number of levels */ 4190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(*src1!=0 && *src2!=0) { /* while both have another level */ 4191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy level from src1 not including 00 or 01 */ 4192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((b=*src1)>=2) { 4193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++src1; 4194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=b; 4195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* add a 02 merge separator */ 4198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=2; 4199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy level from src2 not including 00 or 01 */ 4201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((b=*src2)>=2) { 4202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++src2; 4203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=b; 4204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if both sort keys have another level, then add a 01 level separator and continue */ 4207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*src1==1 && *src2==1) { 4208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++src1; 4209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++src2; 4210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=1; 4211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 4215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * here, at least one sort key is finished now, but the other one 4216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * might have some contents left from containing more levels; 4217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that contents is just appended to the result 4218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 4219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*src1!=0) { 4220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* src1 is not finished, therefore *src2==0, and src1 is appended */ 4221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src2=src1; 4222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* append src2, "the other, unfinished sort key" */ 4224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy((char *)dest, (const char *)src2); 4225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trust that neither sort key contained illegally embedded zero bytes */ 4227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destLength; 4228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* sortkey API */ 4231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 4232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getSortKey(const UCollator *coll, 4233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 4234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 4235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *result, 4236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t resultLength) 4237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 4238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY); 4239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 4240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source, 4241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength)); 4242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t keySize = 0; 4246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source != NULL) { 4248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // source == NULL is actually an error situation, but we would need to 4249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // have an error code to return it. Until we introduce a new 4250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // API, it stays like this 4251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* this uses the function pointer that is set in updateinternalstate */ 4253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* currently, there are two funcs: */ 4254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /*ucol_calcSortKey(...);*/ 4255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /*ucol_calcSortKeySimpleTertiary(...);*/ 4256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru keySize = coll->sortKeyGen(coll, source, sourceLength, &result, resultLength, FALSE, &status); 4258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && result && resultLength > 0) { 4259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // That's not good. Something unusual happened. 4260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We don't know how much we initialized before we failed. 4261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // NULL terminate for safety. 4262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have no way say that we have generated a partial sort key. 4263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //result[0] = 0; 4264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //keySize = 0; 4265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //} 4266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); 4268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(status); 4269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return keySize; 4270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this function is called by the C++ API for sortkey generation */ 4273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 4274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getSortKeyWithAllocation(const UCollator *coll, 4275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, int32_t sourceLength, 4276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t **pResult, 4277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 4278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pResult = 0; 4279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->sortKeyGen(coll, source, sourceLength, pResult, 0, TRUE, pErrorCode); 4280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_FSEC_BUF_SIZE 256 4283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 428427f654740f2a26ad62a5c155af9199af9e69b889claireho// Is this primary weight compressible? 428527f654740f2a26ad62a5c155af9199af9e69b889claireho// Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit). 428627f654740f2a26ad62a5c155af9199af9e69b889claireho// TODO: This should use per-lead-byte flags from FractionalUCA.txt. 428727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline UBool 428827f654740f2a26ad62a5c155af9199af9e69b889clairehoisCompressible(const UCollator * /*coll*/, uint8_t primary1) { 428927f654740f2a26ad62a5c155af9199af9e69b889claireho return UCOL_BYTE_FIRST_NON_LATIN_PRIMARY <= primary1 && primary1 <= maxRegularPrimary; 429027f654740f2a26ad62a5c155af9199af9e69b889claireho} 429127f654740f2a26ad62a5c155af9199af9e69b889claireho 4292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function tries to get the size of a sortkey. It will be invoked if the size of resulting buffer is 0 */ 4293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* or if we run out of space while making a sortkey and want to return ASAP */ 4294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) { 4295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts); 4297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF); 4298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF); 4299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF); 4300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool compareIdent = (strength == UCOL_IDENTICAL); 4301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doCase = (coll->caseLevel == UCOL_ON); 4302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool shifted = (coll->alternateHandling == UCOL_SHIFTED); 4303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UBool qShifted = shifted && (compareQuad == 0); 4304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0); 4305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0); 4306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t fSecsBuff[UCOL_FSEC_BUF_SIZE]; 4307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *fSecs = fSecsBuff; 4308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t fSecsLen = 0, fSecsMaxLen = UCOL_FSEC_BUF_SIZE; 4309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *frenchStartPtr = NULL, *frenchEndPtr = NULL; 4310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t variableTopValue = coll->variableTopValue; 4312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1); 4313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(doHiragana) { 4314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_COMMON_BOT4++; 4315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* allocate one more space for hiragana */ 4316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4); 4318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t order = UCOL_NO_MORE_CES; 4320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary1 = 0; 4321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary2 = 0; 4322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t secondary = 0; 4323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiary = 0; 4324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t caseShift = 0; 4325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t c2 = 0, c3 = 0, c4 = 0; /* variables for compression */ 4326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseSwitch = coll->caseSwitch; 4328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryMask = coll->tertiaryMask; 4329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryCommon = coll->tertiaryCommon; 4330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasShifted = FALSE; 4332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool notIsContinuation = FALSE; 4333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadPrimary = 0; 4334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 4337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = ucol_IGetNextCE(coll, s, &status); 4338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(order == UCOL_NO_MORE_CES) { 4339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(order == 0) { 4343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notIsContinuation = !isContinuation(order); 4347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 4350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tertiary = (uint8_t)((order & UCOL_BYTE_SIZE_MASK)); 4351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION)); 4353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 4355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 4356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primary1 = (uint8_t)(order >> 8); 4357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 435827f654740f2a26ad62a5c155af9199af9e69b889claireho /* no need to permute since the actual code values don't matter 435927f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->leadBytePermutationTable != NULL && notIsContinuation) { 436027f654740f2a26ad62a5c155af9199af9e69b889claireho primary1 = coll->leadBytePermutationTable[primary1]; 436127f654740f2a26ad62a5c155af9199af9e69b889claireho } 436227f654740f2a26ad62a5c155af9199af9e69b889claireho */ 4363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436427f654740f2a26ad62a5c155af9199af9e69b889claireho if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0) 436527f654740f2a26ad62a5c155af9199af9e69b889claireho || (!notIsContinuation && wasShifted))) 4366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || (wasShifted && primary1 == 0)) { /* amendment to the UCA says that primary ignorables */ 4367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* and other ignorables should be removed if following a shifted code point */ 4368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */ 4369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we should just completely ignore it */ 4370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 4371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareQuad == 0) { 4373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(c4 > 0) { 4374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c2/UCOL_BOT_COUNT4)+1; 4375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c4 = 0; 4376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 != 0) { 4379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru wasShifted = TRUE; 4383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru wasShifted = FALSE; 4385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */ 438627f654740f2a26ad62a5c155af9199af9e69b889claireho /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */ 4387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* calculate sortkey size */ 4388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(primary1 != UCOL_IGNORABLE) { 4389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 4390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(leadPrimary == primary1) { 4391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(leadPrimary != 0) { 4394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 == UCOL_IGNORABLE) { 4397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* one byter, not compressed */ 4398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru leadPrimary = 0; 440027f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(isCompressible(coll, primary1)) { 440127f654740f2a26ad62a5c155af9199af9e69b889claireho /* compress */ 4402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru leadPrimary = primary1; 4403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize+=2; 440427f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 440527f654740f2a26ad62a5c155af9199af9e69b889claireho leadPrimary = 0; 440627f654740f2a26ad62a5c155af9199af9e69b889claireho currentSize+=2; 4407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */ 4410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 != UCOL_IGNORABLE) { 4412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secondary > compareSec) { /* I think that != 0 test should be != IGNORABLE */ 4418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isFrenchSec){ 4419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary == UCOL_COMMON2 && notIsContinuation) { 4420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c2++; 4421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(c2 > 0) { 4423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary > UCOL_COMMON2) { // not necessary for 4th level. 4424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c2/(uint32_t)UCOL_TOP_COUNT2)+1; 4425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+1; 4427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c2 = 0; 4429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSecs[fSecsLen++] = secondary; 4434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(fSecsLen == fSecsMaxLen) { 4435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *fSecsTemp; 4436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(fSecs == fSecsBuff) { 4437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSecsTemp = (uint8_t *)uprv_malloc(2*fSecsLen); 4438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSecsTemp = (uint8_t *)uprv_realloc(fSecs, 2*fSecsLen); 4440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(fSecsTemp == NULL) { 4442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 4443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 4444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSecs = fSecsTemp; 4446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSecsMaxLen *= 2; 4447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 4449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (frenchStartPtr != NULL) { 4450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* reverse secondaries from frenchStartPtr up to frenchEndPtr */ 4451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr); 4452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchStartPtr = NULL; 4453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (frenchStartPtr == NULL) { 4456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchStartPtr = fSecs+fSecsLen-2; 4457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchEndPtr = fSecs+fSecsLen-1; 4459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) { 4464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do the case level if we need to do it. We don't want to calculate 4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // case level for primary ignorables if we have only primary strength and case level 4466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise we would break well formedness of CEs 4467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (caseShift == 0) { 4468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru currentSize++; 4469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru caseShift = UCOL_CASE_SHIFT_START; 4470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tertiary&0x3F) > 0 && notIsContinuation) { 4472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift--; 4473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tertiary &0xC0) != 0) { 4474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (caseShift == 0) { 4475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift = UCOL_CASE_SHIFT_START; 4477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift--; 4479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 4483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary ^= caseSwitch; 4484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tertiary &= tertiaryMask; 4488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(tertiary > compareTer) { /* I think that != 0 test should be != IGNORABLE */ 4489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (tertiary == tertiaryCommon && notIsContinuation) { 4490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c3++; 4491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(c3 > 0) { 4493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) 4494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST)) { 4495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c3/(uint32_t)coll->tertiaryTopCount)+1; 4496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c3/(uint32_t)coll->tertiaryBottomCount)+1; 4498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c3 = 0; 4500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(/*qShifted*/(compareQuad==0) && notIsContinuation) { 4506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(s->flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it 4507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(c4>0) { // Close this part 4508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c4/UCOL_BOT_COUNT4)+1; 4509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c4 = 0; 4510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; // Add the Hiragana 4512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // This wasn't Hiragana, so we can continue adding stuff 4513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c4++; 4514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isFrenchSec){ 4520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(c2 > 0) { 4521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0); 4522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i = 0; 4525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(frenchStartPtr != NULL) { 4526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr); 4527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 0; i<fSecsLen; i++) { 4529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secondary = *(fSecs+fSecsLen-i-1); 4530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This is compression code. */ 4531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary == UCOL_COMMON2) { 4532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ++c2; 4533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(c2 > 0) { 4535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary > UCOL_COMMON2) { // not necessary for 4th level. 4536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c2/(uint32_t)UCOL_TOP_COUNT2)+((c2%(uint32_t)UCOL_TOP_COUNT2 != 0)?1:0); 4537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0); 4539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c2 = 0; 4541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize++; 4543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(c2 > 0) { 4546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0); 4547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(fSecs != fSecsBuff) { 4549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(fSecs); 4550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c3 > 0) { 4554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c3/(uint32_t)coll->tertiaryBottomCount) + ((c3%(uint32_t)coll->tertiaryBottomCount != 0)?1:0); 4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c4 > 0 && compareQuad == 0) { 4558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += (c4/(uint32_t)UCOL_BOT_COUNT4)+((c4%(uint32_t)UCOL_BOT_COUNT4 != 0)?1:0); 4559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(compareIdent) { 4562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru currentSize += u_lengthOfIdenticalLevelRun(s->string, len); 4563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return currentSize; 4565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 4568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void doCaseShift(uint8_t **cases, uint32_t &caseShift) { 4569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (caseShift == 0) { 4570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(*cases)++ = UCOL_CASE_BYTE_START; 4571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift = UCOL_CASE_SHIFT_START; 4572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Adds a value to the buffer if it's safe to add. Increments the number of added values, so that we 4576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// know how many values we wanted to add, even if we didn't add them all 4577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 4578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void addWithIncrement(uint8_t *&primaries, uint8_t *limit, uint32_t &size, const uint8_t value) { 4579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru size++; 4580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primaries < limit) { 4581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries)++ = value; 4582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Packs the secondary buffer when processing French locale. Adds the terminator. 4586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 4587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint8_t *packFrench(uint8_t *primaries, uint8_t *primEnd, uint8_t *secondaries, uint32_t *secsize, uint8_t *frenchStartPtr, uint8_t *frenchEndPtr) { 4588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t secondary; 4589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t count2 = 0; 4590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i = 0, size = 0; 4591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we use i here since the key size already accounts for terminators, so we'll discard the increment 4592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addWithIncrement(primaries, primEnd, i, UCOL_LEVELTERMINATOR); 4593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* If there are any unresolved continuation secondaries, reverse them here so that we can reverse the whole secondary thing */ 4594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(frenchStartPtr != NULL) { 4595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr); 4596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 0; i<*secsize; i++) { 4598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secondary = *(secondaries-i-1); 4599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This is compression code. */ 4600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary == UCOL_COMMON2) { 4601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ++count2; 4602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 4604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary > UCOL_COMMON2) { // not necessary for 4th level. 4605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_TOP_COUNT2) { 4606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2)); 4607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_TOP_COUNT2; 4608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_TOP2 - (count2-1))); 4610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 4612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); 4613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 4614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); 4616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 = 0; 4618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addWithIncrement(primaries, primEnd, size, secondary); 4620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 4623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); 4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); 4628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secsize = size; 4630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return primaries; 4631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0 4634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the sortkey work horse function */ 4636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t U_CALLCONV 4637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_calcSortKey(const UCollator *coll, 4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 4639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 4640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t **result, 4641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t resultLength, 4642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool allocateSKBuffer, 4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 4644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts); 4646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t i = 0; /* general purpose counter */ 4648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Stack allocated buffers for buffers we use */ 4650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t prim[UCOL_PRIMARY_MAX_BUFFER], second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER], caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BUFFER]; 4651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *primaries = *result, *secondaries = second, *tertiaries = tert, *cases = caseB, *quads = quad; 4653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 4655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 4656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(primaries == NULL && allocateSKBuffer == TRUE) { 4659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru primaries = *result = prim; 4660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultLength = UCOL_PRIMARY_MAX_BUFFER; 4661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t secSize = UCOL_SECONDARY_MAX_BUFFER, terSize = UCOL_TERTIARY_MAX_BUFFER, 4664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru caseSize = UCOL_CASE_MAX_BUFFER, quadSize = UCOL_QUAD_MAX_BUFFER; 4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sortKeySize = 1; /* it is always \0 terminated */ 4667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 466850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString normSource; 4669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength); 4671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColAttributeValue strength = coll->strength; 4673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF); 4675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF); 4676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF); 4677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool compareIdent = (strength == UCOL_IDENTICAL); 4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doCase = (coll->caseLevel == UCOL_ON); 4679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0); 4680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool shifted = (coll->alternateHandling == UCOL_SHIFTED); 4681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UBool qShifted = shifted && (compareQuad == 0); 4682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0); 4683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t variableTopValue = coll->variableTopValue; 4685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no 4686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // qShifted, we don't need to set UCOL_COMMON_BOT4 so high. 4687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1); 4688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_HIRAGANA_QUAD = 0; 4689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(doHiragana) { 4690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++; 4691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* allocate one more space for hiragana, value for hiragana */ 4692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4); 4694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* support for special features like caselevel and funky secondaries */ 4696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *frenchStartPtr = NULL; 4697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *frenchEndPtr = NULL; 4698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t caseShift = 0; 4699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + /*(qShifted?1:0)*/(compareQuad?0:1) + (compareIdent?1:0)); 4701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* If we need to normalize, we'll do it all at once at the beginning! */ 470350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *norm2; 4704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(compareIdent) { 470550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2 = Normalizer2Factory::getNFDInstance(*status); 4706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(coll->normalizationMode != UCOL_OFF) { 470750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2 = Normalizer2Factory::getFCDInstance(*status); 4708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 470950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2 = NULL; 471050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 471150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norm2 != NULL) { 471250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normSource.setTo(FALSE, source, len); 471350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status); 471450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(qcYesLength != len) { 471550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString unnormalized = normSource.tempSubString(qcYesLength); 471650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normSource.truncate(qcYesLength); 471750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2->normalizeSecondAndAppend(normSource, unnormalized, *status); 471850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source = normSource.getBuffer(); 471950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len = normSource.length(); 4720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate s; 472350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, source, len, &s, status); 472450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 472550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 472650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 472727f654740f2a26ad62a5c155af9199af9e69b889claireho s.flags &= ~UCOL_ITER_NORM; // source passed the FCD test or else was normalized. 4728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(resultLength == 0 || primaries == NULL) { 473050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_getSortKeySize(coll, &s, sortKeySize, strength, len); 4731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *primarySafeEnd = primaries + resultLength - 1; 4733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strength > UCOL_PRIMARY) { 4734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru primarySafeEnd--; 4735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t minBufferSize = UCOL_MAX_BUFFER; 4738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *primStart = primaries; 4740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *secStart = secondaries; 4741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *terStart = tertiaries; 4742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *caseStart = cases; 4743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *quadStart = quads; 4744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t order = 0; 4746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary1 = 0; 4748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary2 = 0; 4749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t secondary = 0; 4750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiary = 0; 4751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseSwitch = coll->caseSwitch; 4752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryMask = coll->tertiaryMask; 4753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int8_t tertiaryAddition = coll->tertiaryAddition; 4754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryTop = coll->tertiaryTop; 4755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryBottom = coll->tertiaryBottom; 4756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryCommon = coll->tertiaryCommon; 4757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseBits = 0; 4758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool finished = FALSE; 4760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasShifted = FALSE; 4761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool notIsContinuation = FALSE; 4762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t prevBuffSize = 0; 4764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t count2 = 0, count3 = 0, count4 = 0; 4766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadPrimary = 0; 4767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 4769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=prevBuffSize; i<minBufferSize; ++i) { 4770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru order = ucol_IGetNextCE(coll, &s, status); 4772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(order == UCOL_NO_MORE_CES) { 4773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru finished = TRUE; 4774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(order == 0) { 4778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 4779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru notIsContinuation = !isContinuation(order); 4782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(notIsContinuation) { 4784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK); 4785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION)); 4787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 4790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 4791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru primary1 = (uint8_t)(order >> 8); 4792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 479327f654740f2a26ad62a5c155af9199af9e69b889claireho uint8_t originalPrimary1 = primary1; 479427f654740f2a26ad62a5c155af9199af9e69b889claireho if(notIsContinuation && coll->leadBytePermutationTable != NULL) { 479527f654740f2a26ad62a5c155af9199af9e69b889claireho primary1 = coll->leadBytePermutationTable[primary1]; 479627f654740f2a26ad62a5c155af9199af9e69b889claireho } 4797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 479827f654740f2a26ad62a5c155af9199af9e69b889claireho if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0) 479927f654740f2a26ad62a5c155af9199af9e69b889claireho || (!notIsContinuation && wasShifted))) 4800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */ 4801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 4802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* and other ignorables should be removed if following a shifted code point */ 4803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */ 4804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we should just completely ignore it */ 4805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 4806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareQuad == 0) { 4808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(count4 > 0) { 4809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count4 > UCOL_BOT_COUNT4) { 4810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); 4811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count4 -= UCOL_BOT_COUNT4; 4812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1)); 4814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count4 = 0; 4815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We are dealing with a variable and we're treating them as shifted */ 4817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This is a shifted ignorable */ 4818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary1 != 0) { /* we need to check this since we could be in continuation */ 4819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = primary1; 4820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 != 0) { 4822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = primary2; 4823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru wasShifted = TRUE; 4826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru wasShifted = FALSE; 4828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */ 482927f654740f2a26ad62a5c155af9199af9e69b889claireho /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */ 4830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* regular and simple sortkey calc */ 4831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary1 != UCOL_IGNORABLE) { 4832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 4833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(leadPrimary == primary1) { 4834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary2; 4835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(leadPrimary != 0) { 4837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = (uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN); 4838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 == UCOL_IGNORABLE) { 4840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* one byter, not compressed */ 4841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary1; 4842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru leadPrimary = 0; 484327f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(isCompressible(coll, originalPrimary1)) { 484427f654740f2a26ad62a5c155af9199af9e69b889claireho /* compress */ 4845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = leadPrimary = primary1; 4846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primaries <= primarySafeEnd) { 4847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary2; 4848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 484927f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 485027f654740f2a26ad62a5c155af9199af9e69b889claireho leadPrimary = 0; 485127f654740f2a26ad62a5c155af9199af9e69b889claireho *primaries++ = primary1; 485227f654740f2a26ad62a5c155af9199af9e69b889claireho if(primaries <= primarySafeEnd) { 485327f654740f2a26ad62a5c155af9199af9e69b889claireho *primaries++ = primary2; 485427f654740f2a26ad62a5c155af9199af9e69b889claireho } 4855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */ 4858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *primaries++ = primary1; 4859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((primary2 != UCOL_IGNORABLE) && (primaries <= primarySafeEnd)) { 4860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary2; /* second part */ 4861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secondary > compareSec) { 4866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isFrenchSec) { 4867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This is compression code. */ 4868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary == UCOL_COMMON2 && notIsContinuation) { 4869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ++count2; 4870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 4872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary > UCOL_COMMON2) { // not necessary for 4th level. 4873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_TOP_COUNT2) { 4874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2); 4875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_TOP_COUNT2; 4876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - (count2-1)); 4878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 4880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); 4881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 4882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)); 4884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 = 0; 4886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = secondary; 4888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = secondary; 4891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Do the special handling for French secondaries */ 4892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We need to get continuation elements and do intermediate restore */ 4893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* abc1c2c3de with french secondaries need to be edc1c2c3ba NOT edc3c2c1ba */ 4894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 4895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (frenchStartPtr != NULL) { 4896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* reverse secondaries from frenchStartPtr up to frenchEndPtr */ 4897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr); 4898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchStartPtr = NULL; 4899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (frenchStartPtr == NULL) { 4902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchStartPtr = secondaries - 2; 4903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchEndPtr = secondaries-1; 4905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) { 4910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // do the case level if we need to do it. We don't want to calculate 4911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // case level for primary ignorables if we have only primary strength and case level 4912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise we would break well formedness of CEs 4913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru doCaseShift(&cases, caseShift); 4914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 4915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseBits = (uint8_t)(tertiary & 0xC0); 4916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tertiary != 0) { 4918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseFirst == UCOL_UPPER_FIRST) { 4919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((caseBits & 0xC0) == 0) { 4920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(cases-1) |= 1 << (--caseShift); 4921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(cases-1) |= 0 << (--caseShift); 4923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* second bit */ 4924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru doCaseShift(&cases, caseShift); 4925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(cases-1) |= ((caseBits>>6)&1) << (--caseShift); 4926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((caseBits & 0xC0) == 0) { 4929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(cases-1) |= 0 << (--caseShift); 4930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(cases-1) |= 1 << (--caseShift); 4932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* second bit */ 4933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru doCaseShift(&cases, caseShift); 4934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(cases-1) |= ((caseBits>>7)&1) << (--caseShift); 4935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 4942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary ^= caseSwitch; 4943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary &= tertiaryMask; 4947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tertiary > compareTer) { 4948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This is compression code. */ 4949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* sequence size check is included in the if clause */ 4950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (tertiary == tertiaryCommon && notIsContinuation) { 4951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ++count3; 4952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) { 4954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary += tertiaryAddition; 4955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) { 4956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary -= tertiaryAddition; 4957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count3 > 0) { 4959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((tertiary > tertiaryCommon)) { 4960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 > coll->tertiaryTopCount) { 4961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount); 4962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryTopCount; 4963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryTop - (count3-1)); 4965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 > coll->tertiaryBottomCount) { 4967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount); 4968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryBottomCount; 4969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1)); 4971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 = 0; 4973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = tertiary; 4975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(/*qShifted*/(compareQuad==0) && notIsContinuation) { 4979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it 4980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(count4>0) { // Close this part 4981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count4 > UCOL_BOT_COUNT4) { 4982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); 4983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count4 -= UCOL_BOT_COUNT4; 4984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1)); 4986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count4 = 0; 4987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = UCOL_HIRAGANA_QUAD; // Add the Hiragana 4989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // This wasn't Hiragana, so we can continue adding stuff 4990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count4++; 4991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(primaries > primarySafeEnd) { /* We have stepped over the primary buffer */ 4996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == FALSE) { /* need to save our butts if we cannot reallocate */ 499750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, (UChar *)source, len, &s, status); 499850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 499950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 500050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho finished = TRUE; 500150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 500250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 500327f654740f2a26ad62a5c155af9199af9e69b889claireho s.flags &= ~UCOL_ITER_NORM; 5004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len); 5005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru finished = TRUE; 5007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* It's much nicer if we can actually reallocate */ 500950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sks = sortKeySize+(int32_t)((primaries - primStart)+(secondaries - secStart)+(tertiaries - terStart)+(cases-caseStart)+(quads-quadStart)); 5010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sks, status); 5011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primarySafeEnd = primStart + resultLength - 1; 5014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength > UCOL_PRIMARY) { 5015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primarySafeEnd--; 5016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru finished = TRUE; 5021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(finished) { 5027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prevBuffSize = minBufferSize; 5030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t frenchStartOffset = 0, frenchEndOffset = 0; 5032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (frenchStartPtr != NULL) { 503350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho frenchStartOffset = (uint32_t)(frenchStartPtr - secStart); 503450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho frenchEndOffset = (uint32_t)(frenchEndPtr - secStart); 5035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secStart = reallocateBuffer(&secondaries, secStart, second, &secSize, 2*secSize, status); 5037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terStart = reallocateBuffer(&tertiaries, terStart, tert, &terSize, 2*terSize, status); 5038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseStart = reallocateBuffer(&cases, caseStart, caseB, &caseSize, 2*caseSize, status); 5039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru quadStart = reallocateBuffer(&quads, quadStart, quad, &quadSize, 2*quadSize, status); 5040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 5041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (frenchStartPtr != NULL) { 5046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchStartPtr = secStart + frenchStartOffset; 5047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchEndPtr = secStart + frenchEndOffset; 5048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru minBufferSize *= 2; 5050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Here, we are generally done with processing */ 5054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* bailing out would not be too productive */ 5055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(*status)) { 505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sortKeySize += (uint32_t)(primaries - primStart); 5058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we have done all the CE's, now let's put them together to form a key */ 5059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareSec == 0) { 5060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 5061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 5062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); 5063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 5064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)); 5066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 506750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t secsize = (uint32_t)(secondaries-secStart); 5068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isFrenchSec) { // Regular situation, we know the length of secondaries 5069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize += secsize; 5070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sortKeySize <= resultLength) { 5071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, secStart, secsize); 5073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries += secsize; 5074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { /* need to save our butts if we cannot reallocate */ 5076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status); 5077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, secStart, secsize); 5081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries += secsize; 5082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 5084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // French secondary is on. We will need to pack French. packFrench will add the level terminator 5093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *newPrim = packFrench(primaries, primStart+resultLength, secondaries, &secsize, frenchStartPtr, frenchEndPtr); 5094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize += secsize; 5095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sortKeySize <= resultLength) { // if we managed to pack fine 5096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries = newPrim; // update the primary pointer 5097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // overflow, need to reallocate and redo 5098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { /* need to save our butts if we cannot reallocate */ 5099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status); 5100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries = packFrench(primaries, primStart+resultLength, secondaries, &secsize, frenchStartPtr, frenchEndPtr); 5102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 5104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(doCase) { 511650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t casesize = (uint32_t)(cases - caseStart); 5117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize += casesize; 5118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sortKeySize <= resultLength) { 5119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, caseStart, casesize); 5121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries += casesize; 5122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { 5124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status); 5125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, caseStart, casesize); 5129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 5131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareTer == 0) { 5142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count3 > 0) { 5143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (coll->tertiaryCommon != UCOL_COMMON_BOT3) { 5144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 >= coll->tertiaryTopCount) { 5145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount); 5146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryTopCount; 5147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryTop - count3); 5149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 > coll->tertiaryBottomCount) { 5151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount); 5152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryBottomCount; 5153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1)); 5155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 515750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t tersize = (uint32_t)(tertiaries - terStart); 5158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize += tersize; 5159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sortKeySize <= resultLength) { 5160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, terStart, tersize); 5162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries += tersize; 5163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { 5165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status); 5166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, terStart, tersize); 5170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 5172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareQuad == 0/*qShifted == TRUE*/) { 5182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(count4 > 0) { 5183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count4 > UCOL_BOT_COUNT4) { 5184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); 5185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count4 -= UCOL_BOT_COUNT4; 5186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1)); 5188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 518950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t quadsize = (uint32_t)(quads - quadStart); 5190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize += quadsize; 5191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sortKeySize <= resultLength) { 5192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, quadStart, quadsize); 5194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries += quadsize; 5195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { 5197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status); 5198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, quadStart, quadsize); 5202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 5204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(compareIdent) { 5215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize += u_lengthOfIdenticalLevelRun(s.string, len); 5216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sortKeySize <= resultLength) { 5217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries += u_writeIdenticalLevelRun(s.string, len, primaries); 5219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { 5221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, sortKeySize, status); 5222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru u_writeIdenticalLevelRun(s.string, len, primaries); 5226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 5228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = '\0'; 5239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { 5242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = (uint8_t*)uprv_malloc(sortKeySize); 5243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 5244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (*result == NULL) { 5245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 5246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(*result, primStart, sortKeySize); 5249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primStart != prim) { 5250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(primStart); 5251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querucleanup: 5255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (allocateSKBuffer == FALSE && resultLength > 0 && U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) { 5256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* NULL terminate for safety */ 5257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru **result = 0; 5258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(terStart != tert) { 5260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(terStart); 5261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(secStart); 5262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(caseStart); 5263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(quadStart); 5264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* To avoid memory leak, free the offset buffer if necessary. */ 5267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_freeOffsetBuffer(&s); 5268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return sortKeySize; 5270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 5271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t U_CALLCONV 5274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_calcSortKeySimpleTertiary(const UCollator *coll, 5275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 5276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 5277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t **result, 5278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t resultLength, 5279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool allocateSKBuffer, 5280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 5281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 5282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ALIGN_CODE(16); 5283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts); 5285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t i = 0; /* general purpose counter */ 5286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Stack allocated buffers for buffers we use */ 5288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t prim[UCOL_PRIMARY_MAX_BUFFER], second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER]; 5289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *primaries = *result, *secondaries = second, *tertiaries = tert; 5291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 5293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(primaries == NULL && allocateSKBuffer == TRUE) { 5297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru primaries = *result = prim; 5298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultLength = UCOL_PRIMARY_MAX_BUFFER; 5299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t secSize = UCOL_SECONDARY_MAX_BUFFER, terSize = UCOL_TERTIARY_MAX_BUFFER; 5302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sortKeySize = 3; /* it is always \0 terminated plus separators for secondary and tertiary */ 5304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 530550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString normSource; 5306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = sourceLength; 5308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* If we need to normalize, we'll do it all at once at the beginning! */ 531050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(coll->normalizationMode != UCOL_OFF) { 531150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normSource.setTo(len < 0, source, len); 531250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *norm2 = Normalizer2Factory::getFCDInstance(*status); 531350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status); 531450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(qcYesLength != normSource.length()) { 531550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString unnormalized = normSource.tempSubString(qcYesLength); 531650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normSource.truncate(qcYesLength); 531750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2->normalizeSecondAndAppend(normSource, unnormalized, *status); 531850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source = normSource.getBuffer(); 531950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len = normSource.length(); 5320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate s; 532350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, (UChar *)source, len, &s, status); 532450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 532550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 532650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 532727f654740f2a26ad62a5c155af9199af9e69b889claireho s.flags &= ~UCOL_ITER_NORM; // source passed the FCD test or else was normalized. 5328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(resultLength == 0 || primaries == NULL) { 533050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len); 5331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *primarySafeEnd = primaries + resultLength - 2; 5334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t minBufferSize = UCOL_MAX_BUFFER; 5336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *primStart = primaries; 5338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *secStart = secondaries; 5339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *terStart = tertiaries; 5340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t order = 0; 5342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary1 = 0; 5344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t primary2 = 0; 5345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t secondary = 0; 5346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiary = 0; 5347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseSwitch = coll->caseSwitch; 5348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryMask = coll->tertiaryMask; 5349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int8_t tertiaryAddition = coll->tertiaryAddition; 5350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryTop = coll->tertiaryTop; 5351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryBottom = coll->tertiaryBottom; 5352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryCommon = coll->tertiaryCommon; 5353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t prevBuffSize = 0; 5355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool finished = FALSE; 5357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool notIsContinuation = FALSE; 5358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t count2 = 0, count3 = 0; 5360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t leadPrimary = 0; 5361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 5363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=prevBuffSize; i<minBufferSize; ++i) { 5364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru order = ucol_IGetNextCE(coll, &s, status); 5366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(order == 0) { 5368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 5369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(order == UCOL_NO_MORE_CES) { 5372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru finished = TRUE; 5373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru notIsContinuation = !isContinuation(order); 5377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(notIsContinuation) { 5379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary = (uint8_t)((order & tertiaryMask)); 5380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION)); 5382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 538327f654740f2a26ad62a5c155af9199af9e69b889claireho 5384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 5385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); 5386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru primary1 = (uint8_t)(order >> 8); 5387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 538827f654740f2a26ad62a5c155af9199af9e69b889claireho uint8_t originalPrimary1 = primary1; 538927f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->leadBytePermutationTable != NULL && notIsContinuation) { 539027f654740f2a26ad62a5c155af9199af9e69b889claireho primary1 = coll->leadBytePermutationTable[primary1]; 539127f654740f2a26ad62a5c155af9199af9e69b889claireho } 539227f654740f2a26ad62a5c155af9199af9e69b889claireho 5393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */ 539427f654740f2a26ad62a5c155af9199af9e69b889claireho /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */ 5395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */ 5396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* regular and simple sortkey calc */ 5397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(primary1 != UCOL_IGNORABLE) { 5398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 5399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(leadPrimary == primary1) { 5400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary2; 5401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(leadPrimary != 0) { 5403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = (uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN); 5404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 == UCOL_IGNORABLE) { 5406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* one byter, not compressed */ 5407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary1; 5408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru leadPrimary = 0; 540927f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(isCompressible(coll, originalPrimary1)) { 541027f654740f2a26ad62a5c155af9199af9e69b889claireho /* compress */ 5411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = leadPrimary = primary1; 5412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary2; 541327f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 541427f654740f2a26ad62a5c155af9199af9e69b889claireho leadPrimary = 0; 541527f654740f2a26ad62a5c155af9199af9e69b889claireho *primaries++ = primary1; 541627f654740f2a26ad62a5c155af9199af9e69b889claireho *primaries++ = primary2; 5417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */ 5420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary1; 5421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 != UCOL_IGNORABLE) { 5422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primaries++ = primary2; /* second part */ 5423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(secondary > 0) { /* I think that != 0 test should be != IGNORABLE */ 5428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This is compression code. */ 5429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary == UCOL_COMMON2 && notIsContinuation) { 5430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ++count2; 5431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 5433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (secondary > UCOL_COMMON2) { // not necessary for 4th level. 5434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_TOP_COUNT2) { 5435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2); 5436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_TOP_COUNT2; 5437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - (count2-1)); 5439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 5441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); 5442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 5443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)); 5445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 = 0; 5447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = secondary; 5449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(notIsContinuation) { 5453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary ^= caseSwitch; 5454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tertiary > 0) { 5457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* This is compression code. */ 5458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* sequence size check is included in the if clause */ 5459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (tertiary == tertiaryCommon && notIsContinuation) { 5460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ++count3; 5461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) { 5463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary += tertiaryAddition; 5464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) { 5465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary -= tertiaryAddition; 5466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count3 > 0) { 5468c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((tertiary > tertiaryCommon)) { 5469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 > coll->tertiaryTopCount) { 5470c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount); 5471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryTopCount; 5472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryTop - (count3-1)); 5474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 > coll->tertiaryBottomCount) { 5476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount); 5477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryBottomCount; 5478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1)); 5480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 = 0; 5482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = tertiary; 5484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(primaries > primarySafeEnd) { /* We have stepped over the primary buffer */ 5488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == FALSE) { /* need to save our butts if we cannot reallocate */ 548950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, (UChar *)source, len, &s, status); 549050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 549150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 549250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho finished = TRUE; 549350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 549450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 549527f654740f2a26ad62a5c155af9199af9e69b889claireho s.flags &= ~UCOL_ITER_NORM; 5496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len); 5497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru finished = TRUE; 5499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* It's much nicer if we can actually reallocate */ 550150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sks = sortKeySize+(int32_t)((primaries - primStart)+(secondaries - secStart)+(tertiaries - terStart)); 5502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sks, status); 5503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primarySafeEnd = primStart + resultLength - 2; 5506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru finished = TRUE; 5510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(finished) { 5516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru prevBuffSize = minBufferSize; 5519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secStart = reallocateBuffer(&secondaries, secStart, second, &secSize, 2*secSize, status); 5520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terStart = reallocateBuffer(&tertiaries, terStart, tert, &terSize, 2*terSize, status); 5521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru minBufferSize *= 2; 5522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { // if we cannot reallocate buffers, we can at least give the sortkey size 5523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(*status)) { 553150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sortKeySize += (uint32_t)(primaries - primStart); 5532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we have done all the CE's, now let's put them together to form a key */ 5533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count2 > 0) { 5534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count2 > UCOL_BOT_COUNT2) { 5535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); 5536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count2 -= (uint32_t)UCOL_BOT_COUNT2; 5537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)); 5539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 554050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t secsize = (uint32_t)(secondaries-secStart); 5541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize += secsize; 5542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sortKeySize <= resultLength) { 5543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(primaries, secStart, secsize); 5545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries += secsize; 5546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { 5548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status); 5549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, secStart, secsize); 5553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 5555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 5561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count3 > 0) { 5565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) { 5566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 >= coll->tertiaryTopCount) { 5567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount); 5568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryTopCount; 5569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryTop - count3); 5571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count3 > coll->tertiaryBottomCount) { 5573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount); 5574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count3 -= (uint32_t)coll->tertiaryBottomCount; 5575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1)); 5577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 557950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t tersize = (uint32_t)(tertiaries - terStart); 5580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize += tersize; 5581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sortKeySize <= resultLength) { 5582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(primaries, terStart, tersize); 5584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primaries += tersize; 5585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { 5587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status); 5588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 5589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = primStart; 5590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = UCOL_LEVELTERMINATOR; 5591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(primaries, terStart, tersize); 5592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 5594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* We ran out of memory!? We can't recover. */ 5595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; 5596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 559927f654740f2a26ad62a5c155af9199af9e69b889claireho *status = U_BUFFER_OVERFLOW_ERROR; 5600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(primaries++) = '\0'; 5604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(allocateSKBuffer == TRUE) { 5607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *result = (uint8_t*)uprv_malloc(sortKeySize); 5608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 5609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (*result == NULL) { 5610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 5611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 5612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(*result, primStart, sortKeySize); 5614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primStart != prim) { 5615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(primStart); 5616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querucleanup: 5620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (allocateSKBuffer == FALSE && resultLength > 0 && U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) { 5621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* NULL terminate for safety */ 5622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru **result = 0; 5623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(terStart != tert) { 5625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(terStart); 5626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(secStart); 5627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* To avoid memory leak, free the offset buffer if necessary. */ 5630b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_freeOffsetBuffer(&s); 5631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 5632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return sortKeySize; 5633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 5634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline 5636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) { 5637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool notIsContinuation = !isContinuation(CE); 5638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF); 563927f654740f2a26ad62a5c155af9199af9e69b889claireho if((LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0) 564027f654740f2a26ad62a5c155af9199af9e69b889claireho || (!notIsContinuation && *wasShifted))) 5641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (*wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */ 5642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 5643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The stuff below should probably be in the sortkey code... maybe not... 5644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary1 != 0) { /* if we were shifted and we got an ignorable code point */ 5645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we should just completely ignore it */ 5646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *wasShifted = TRUE; 5647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //continue; 5648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //*wasShifted = TRUE; 5650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 5651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *wasShifted = FALSE; 5653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 5654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 5656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline 5657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *dest) { 5658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(level < maxLevel) { 5659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = UCOL_LEVELTERMINATOR; 5660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = 0; 5662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 5664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** enumeration of level identifiers for partial sort key generation */ 5666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 5667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_PRIMARY = 0, 5668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_SECONDARY = 1, 5669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_CASE = 2, 5670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_TERTIARY = 3, 5671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_QUATERNARY = 4, 5672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_QUIN = 5, /** This is an extra level, not used - but we have three bits to blow */ 5673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_IDENTICAL = 6, 5674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_NULL = 7, /** level for the end of sort key. Will just produce zeros */ 5675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_LIMIT 5676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 5677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** collation state enum. *_SHIFT value is how much to shift right 5679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to get the state piece to the right. *_MASK value should be 5680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ANDed with the shifted state. This data is stored in state[1] 5681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * field. 5682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 5684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_LEVEL_SHIFT = 0, /** level identificator. stores an enum value from above */ 5685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_LEVEL_MASK = 7, /** three bits */ 5686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */ 5687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1, 5688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** can be only 0 or 1, since we get up to two bytes from primary or quaternary 5689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This field is also used to denote that the French secondary level is finished 5690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_WAS_SHIFTED_SHIFT = 4,/** was the last value shifted */ 5692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_WAS_SHIFTED_MASK = 1, /** can be 0 or 1 (Boolean) */ 5693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already written */ 5694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */ 5695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** When we do French we need to reverse secondary values. However, continuations 5696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba 5697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_BOCSU_BYTES_SHIFT = 7, 5699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_BOCSU_BYTES_MASK = 3, 5700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_CONSUMED_CES_SHIFT = 9, 5701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF 5702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 5703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// macro calculating the number of expansion CEs available 5705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn 5706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** main sortkey part procedure. On the first call, 5709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * you should pass in a collator, an iterator, empty state 5710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * state[0] == state[1] == 0, a buffer to hold results 5711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of bytes you need and an error code pointer. 5712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make sure your buffer is big enough to hold the wanted 5713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of sortkey bytes. I don't check. 5714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The only meaningful status you can get back is 5715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U_BUFFER_OVERFLOW_ERROR, which basically means that you 5716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have been dealt a raw deal and that you probably won't 5717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * be able to use partial sortkey generation for this 5718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * particular combination of string and collator. This 5719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is highly unlikely, but you should still check the error code. 5720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any other status means that you're not in a sane situation 5721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * anymore. After the first call, preserve state values and 5722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * use them on subsequent calls to obtain more bytes of a sortkey. 5723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Use until the number of bytes written is smaller than the requested 5724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of bytes. Generated sortkey is not compatible with the 5725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * one generated by ucol_getSortKey, as we don't do any compression. 5726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * However, levels are still terminated by a 1 (one) and the sortkey 5727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is terminated by a 0 (zero). Identical level is the same as in the 5728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * regular sortkey - internal bocu-1 implementation is used. 5729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For curious, although you cannot do much about this, here is 5730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the structure of state words. 5731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * state[0] - iterator state. Depends on the iterator implementation, 5732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * but allows the iterator to continue where it stopped in 5733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last iteration. 5734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * state[1] - collation processing state. Here is the distribution 5735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the bits: 5736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0, 1, 2 - level of the sortkey - primary, secondary, case, tertiary 5737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * quaternary, quin (we don't use this one), identical and 5738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * null (producing only zeroes - first one to terminate the 5739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * sortkey and subsequent to fill the buffer). 5740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 3 - byte count. Number of bytes written on the primary level. 5741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4 - was shifted. Whether the previous iteration finished in the 5742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * shifted state. 5743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 5, 6 - French continuation bytes written. See the comment in the enum 5744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 7,8 - Bocsu bytes used. Number of bytes from a bocu sequence on 5745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the identical level. 5746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 9..31 - CEs consumed. Number of getCE or next32 operations performed 5747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * since thes last successful update of the iterator state. 5748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 5750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_nextSortKeyPart(const UCollator *coll, 5751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharIterator *iter, 5752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t state[2], 5753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dest, int32_t count, 5754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) 5755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 5756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* error checking */ 5757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(status==NULL || U_FAILURE(*status)) { 5758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 5759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART); 5761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( coll==NULL || iter==NULL || 5762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state==NULL || 5763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count<0 || (count>0 && dest==NULL) 5764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 5765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status=U_ILLEGAL_ARGUMENT_ERROR; 5766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_STATUS(status); 5767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 5768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d", 5771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll, iter, state[0], state[1], dest, count); 5772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count==0) { 5774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* nothing to do */ 5775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_VALUE(0); 5776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 5777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** Setting up situation according to the state we got from the previous iteration */ 5779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The state of the iterator from the previous invocation 5780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t iterState = state[0]; 5781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Has the last iteration ended in the shifted state 5782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_SHIFTED_MASK)?TRUE:FALSE; 5783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // What is the current level of the sortkey? 5784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK; 5785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Have we written only one byte from a two byte primary in the previous iteration? 5786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Also on secondary level - have we finished with the French secondary? 5787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK; 5788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // number of bytes in the continuation buffer for French 5789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK; 5790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Number of bytes already written from a bocsu sequence. Since 5791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the longes bocsu sequence is 4 long, this can be up to 3. 5792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK_BOCSU_BYTES_MASK; 5793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Number of elements that need to be consumed in this iteration because 5794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the iterator returned UITER_NO_STATE at the end of the last iteration, 5795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so we had to save the last valid state. 5796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED_CES_MASK; 5797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** values that depend on the collator attributes */ 5799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // strength of the collator. 5800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status); 5801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // maximal level of the partial sortkey. Need to take whether case level is done 5802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxLevel = 0; 5803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strength < UCOL_TERTIARY) { 5804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) { 5805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = UCOL_PSK_CASE; 5806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = strength; 5808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strength == UCOL_TERTIARY) { 5811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = UCOL_PSK_TERTIARY; 5812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(strength == UCOL_QUATERNARY) { 5813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = UCOL_PSK_QUATERNARY; 5814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // identical 5815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxLevel = UCOL_IDENTICAL; 5816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation 5819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t UCOL_HIRAGANA_QUAD = 5820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF; 5821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Boundary value that decides whether a CE is shifted or not 5822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0; 5823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Are we doing French collation? 5824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON); 5825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** initializing the collation state */ 5827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool notIsContinuation = FALSE; 5828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t CE = UCOL_NO_MORE_CES; 5829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate s; 583150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, NULL, -1, &s, status); 583250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 583350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTRACE_EXIT_STATUS(*status); 583450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 583550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.iterator = iter; 5837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.flags |= UCOL_USE_ITERATOR; 5838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This variable tells us whether we have produced some other levels in this iteration 5839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // before we moved to the identical level. In that case, we need to switch the 5840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // type of the iterator. 5841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doingIdenticalFromStart = FALSE; 5842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Normalizing iterator 5843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The division for the array length may truncate the array size to 5844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high 5845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for all platforms anyway. 5846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 5847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormIterator *normIter = NULL; 5848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the normalization is turned on for the collator and we are below identical level 5849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we will use a FCD normalizing iterator 5850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && level < UCOL_PSK_IDENTICAL) { 5851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status); 5852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status); 5853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.flags &= ~UCOL_ITER_NORM; 5854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 5855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(level == UCOL_PSK_IDENTICAL) { 5859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // for identical level, we need a NFD iterator. We need to instantiate it here, since we 5860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // will be updating the state - and this cannot be done on an ordinary iterator. 5861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status); 5862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); 5863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.flags &= ~UCOL_ITER_NORM; 5864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 5865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru doingIdenticalFromStart = TRUE; 5869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is the tentative new state of the iterator. The problem 5872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // is that the iterator might return an undefined state, in 5873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which case we should save the last valid state and increase 5874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the iterator skip value. 5875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newState = 0; 5876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First, we set the iterator to the last valid position 5878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // from the last iteration. This was saved in state[0]. 5879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(iterState == 0) { 5880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* initial state */ 5881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone) { 5882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_LIMIT); 5883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset to previous state */ 5888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->setState(s.iterator, iterState, status); 5889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 5890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This variable tells us whether we can attempt to update the state 5898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // of iterator. Situations where we don't want to update iterator state 5899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are the existence of expansion CEs that are not yet processed, and 5900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // finishing the case level without enough space in the buffer to insert 5901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a level terminator. 5902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool canUpdateState = TRUE; 5903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Consume all the CEs that were consumed at the end of the previous 5905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // iteration without updating the iterator state. On identical level, 5906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // consume the code points. 5907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t counter = cces; 5908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level < UCOL_PSK_IDENTICAL) { 5909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(counter-->0) { 5910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If we're doing French and we are on the secondary level, 5911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we go backwards. 5912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(level == UCOL_PSK_SECONDARY && doingFrench) { 5913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetPrevCE(coll, &s, status); 5914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 5916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* should not happen */ 5919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status=U_INTERNAL_PROGRAM_ERROR; 5920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 5921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 5922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 5924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 5925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(counter-->0) { 5929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uiter_next32(s.iterator); 5930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // French secondary needs to know whether the iterator state of zero came from previous level OR 5934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // from a new invocation... 5935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool wasDoingPrimary = FALSE; 5936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // destination buffer byte counter. When this guy 5937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // gets to count, we're done with the iteration 5938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0; 5939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // used to count the zero bytes written after we 5940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // have finished with the sort key 5941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t j = 0; 5942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Hm.... I think we're ready to plunge in. Basic story is as following: 5945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we have a fall through case based on level. This is used for initial 5946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // positioning on iteration start. Every level processor contains a 5947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for(;;) which will be broken when we exhaust all the CEs. Other 5948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // way to exit is a goto saveState, which happens when we have filled 5949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // out our buffer. 5950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(level) { 5951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_PRIMARY: 5952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru wasDoingPrimary = TRUE; 5953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 5954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i==count) { 5955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 5956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We should save the state only if we 5958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are sure that we are done with the 5959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // previous iterator state 5960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState && byteCountOrFrenchDone == 0) { 5961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 5962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 5963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 5964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 5965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 5968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cces++; 5969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 5970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Add the level separator 5971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 5972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone=0; 5973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Restart the iteration an move to the 5974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // second level 5975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 5976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cces = 0; 5977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_SECONDARY; 5978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 598027f654740f2a26ad62a5c155af9199af9e69b889claireho if(!isContinuation(CE)){ 598127f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->leadBytePermutationTable != NULL){ 598227f654740f2a26ad62a5c155af9199af9e69b889claireho CE = (coll->leadBytePermutationTable[CE>>24] << 24) | (CE & 0x00FFFFFF); 598327f654740f2a26ad62a5c155af9199af9e69b889claireho } 598427f654740f2a26ad62a5c155af9199af9e69b889claireho } 5985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isShiftedCE(CE, LVT, &wasShifted)) { 5986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */ 5987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 5988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(byteCountOrFrenchDone == 0) { 5989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // get the second byte of primary 5990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)(CE >> 8); 5991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 5993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((CE &=0xff)!=0) { 5995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i==count) { 5996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* overflow */ 5997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 1; 5998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces--; 5999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 6000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 6002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 6006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 6007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 6009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* fall through to next level */ 6012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_PSK_SECONDARY: 6013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_SECONDARY) { 6014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!doingFrench) { 6015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 6016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 6017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 6018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We should save the state only if we 6020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are sure that we are done with the 6021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state 6022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 6023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 6024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 6025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 6026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 6030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 6031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 6032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 6033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 6034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 6035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration an move to the 6036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // second level 6037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 6038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_CASE; 6040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isShiftedCE(CE, LVT, &wasShifted)) { 6043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= 8; /* get secondary */ 6044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 6045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 6046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 6049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 6050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 6052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // French secondary processing 6055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t frenchBuff[UCOL_MAX_BUFFER]; 6056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t frenchIndex = 0; 6057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Here we are going backwards. 6058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If the iterator is at the beggining, it should be 6059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // moved to end. 6060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(wasDoingPrimary) { 6061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_LIMIT); 6062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 6065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 6066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 6067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 6069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 6070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 6071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 6072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetPrevCE(coll, &s, status); 6076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 6077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 6078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 6079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 6080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 6081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration an move to the next level 6082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 6083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_CASE; 6084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(CE)) { // if it's a continuation, we want to save it and 6087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // reverse when we get a first non-continuation CE. 6088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= 8; 6089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchBuff[frenchIndex++] = (uint8_t)CE; 6090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(!isShiftedCE(CE, LVT, &wasShifted)) { 6091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= 8; /* get secondary */ 6092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!frenchIndex) { 6093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 6094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 6095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchBuff[frenchIndex++] = (uint8_t)CE; 6098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru frenchIndex -= usedFrench; 6099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru usedFrench = 0; 6100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i < count && frenchIndex) { 6101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = frenchBuff[--frenchIndex]; 6102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru usedFrench++; 6103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 6107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 6108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 6110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_CASE; 6115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 6117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_CASE: 6118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) { 6119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t caseShift = UCOL_CASE_SHIFT_START; 6120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t caseByte = UCOL_CASE_BYTE_START; 6121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t caseBits = 0; 6122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 612450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(caseShift <= UCOL_CASE_SHIFT_START); 6125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 6126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 6127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We should save the state only if we 6129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are sure that we are done with the 6130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state 6131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 6132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 6133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 6134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 6135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 6139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 6140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 6141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // On the case level we might have an unfinished 6142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // case byte. Add one if it's started. 6143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(caseShift != UCOL_CASE_SHIFT_START) { 6144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = caseByte; 6145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We have finished processing CEs on this level. 6148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // However, we don't know if we have enough space 6149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // to add a case level terminator. 6150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i < count) { 6151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 6152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 6153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration and move to the 6154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // next level 6155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 6156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_TERTIARY; 6157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 6159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isShiftedCE(CE, LVT, &wasShifted)) { 6164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 || strength > UCOL_PRIMARY)) { 6165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // do the case level if we need to do it. We don't want to calculate 6166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // case level for primary ignorables if we have only primary strength and case level 6167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise we would break well formedness of CEs 6168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK); 6169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseBits = (uint8_t)(CE & 0xC0); 6170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // this copies the case level logic from the 6171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // sort key generation code 6172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 617350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (caseShift == 0) { 617450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i++] = caseByte; 617550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho caseShift = UCOL_CASE_SHIFT_START; 617650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho caseByte = UCOL_CASE_BYTE_START; 617750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 6178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseFirst == UCOL_UPPER_FIRST) { 6179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((caseBits & 0xC0) == 0) { 6180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= 1 << (--caseShift); 6181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= 0 << (--caseShift); 6183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* second bit */ 6184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(caseShift == 0) { 6185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = caseByte; 6186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift = UCOL_CASE_SHIFT_START; 6187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte = UCOL_CASE_BYTE_START; 6188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= ((caseBits>>6)&1) << (--caseShift); 6190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((caseBits & 0xC0) == 0) { 6193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= 0 << (--caseShift); 6194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= 1 << (--caseShift); 6196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* second bit */ 6197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(caseShift == 0) { 6198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = caseByte; 6199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseShift = UCOL_CASE_SHIFT_START; 6200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte = UCOL_CASE_BYTE_START; 6201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru caseByte |= ((caseBits>>7)&1) << (--caseShift); 6203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Not sure this is correct for the case level - revisit 6210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 6211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 6212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 6214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_TERTIARY; 6218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 6220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_TERTIARY: 6221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_TERTIARY) { 6222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 6223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 6224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 6225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We should save the state only if we 6227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are sure that we are done with the 6228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state 6229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 6230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 6231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 6232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 6233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 6237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 6238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 6239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 6240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 6241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 6242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration an move to the 6243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // second level 6244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 6245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_QUATERNARY; 6247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isShiftedCE(CE, LVT, &wasShifted)) { 6250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notIsContinuation = !isContinuation(CE); 6251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 6253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK); 6254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE ^= coll->caseSwitch; 6255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE &= coll->tertiaryMask; 6256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION)); 6258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 6261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 6262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 6265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 6266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 6268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if we're not doing tertiary 6272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // skip to the end 6273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_NULL; 6274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 6276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_QUATERNARY: 6277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_QUATERNARY) { 6278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 6279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 6280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 6281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We should save the state only if we 6283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are sure that we are done with the 6284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state 6285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(canUpdateState) { 6286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 6287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 6288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 6289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = ucol_IGetNextCE(coll, &s, status); 6293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 6294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==UCOL_NO_MORE_CES) { 6295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Add the level separator 6296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 6297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //dest[i++] = UCOL_LEVELTERMINATOR; 6298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 6299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restart the iteration an move to the 6300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // second level 6301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator->move(s.iterator, 0, UITER_START); 6302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_QUIN; 6304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE==0) 6307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 6308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isShiftedCE(CE, LVT, &wasShifted)) { 6309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE >>= 16; /* get primary */ 6310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE != 0) { 6311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(byteCountOrFrenchDone == 0) { 6312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)(CE >> 8); 6313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 0; 6315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((CE &=0xff)!=0) { 6317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i==count) { 6318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* overflow */ 6319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru byteCountOrFrenchDone = 1; 6320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 6321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++]=(uint8_t)CE; 6323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notIsContinuation = !isContinuation(CE); 6327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(notIsContinuation) { 6328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it 6329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = UCOL_HIRAGANA_QUAD; 6330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = 0xFF; 6332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(uprv_numAvailableExpCEs(s)) { 6336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = FALSE; 6337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru canUpdateState = TRUE; 6339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if we're not doing quaternary 6343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // skip to the end 6344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_NULL; 6345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 6347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_QUIN: 6348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_IDENTICAL; 6349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 6350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_IDENTICAL: 6351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_IDENTICAL) { 6352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 first, second; 6353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t bocsuBytesWritten = 0; 6354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We always need to do identical on 6355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the NFD form of the string. 6356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(normIter == NULL) { 6357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we arrived from the level below and 6358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // normalization was not turned on. 6359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // therefore, we need to make a fresh NFD iterator 6360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status); 6361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); 6362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(!doingIdenticalFromStart) { 6363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // there is an iterator, but we did some other levels. 6364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // therefore, we have a FCD iterator - need to make 6365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a NFD one. 6366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // normIter being at the beginning does not guarantee 6367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that the underlying iterator is at the beginning 6368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iter->move(iter, 0, UITER_START); 6369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); 6370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // At this point we have a NFD iterator that is positioned 6372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // in the right place 6373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status)) { 6374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 6375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru first = uiter_previous32(s.iterator); 6378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // maybe we're at the start of the string 6379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(first == U_SENTINEL) { 6380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru first = 0; 6381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uiter_next32(s.iterator); 6383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru j = 0; 6386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 6387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(i == count) { 6388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(j+1 < bocsuBytesWritten) { 6389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bocsuBytesUsed = j+1; 6390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto saveState; 6392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // On identical level, we will always save 6395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the state if we reach this point, since 6396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we don't depend on getNextCE for content 6397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // all the content is in our buffer and we 6398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // already either stored the full buffer OR 6399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise we won't arrive here. 6400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru newState = s.iterator->getState(s.iterator); 6401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newState != UITER_NO_STATE) { 6402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iterState = newState; 6403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces = 0; 6404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t buff[4]; 6407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru second = uiter_next32(s.iterator); 6408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cces++; 6409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // end condition for identical level 6411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(second == U_SENTINEL) { 6412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru terminatePSKLevel(level, maxLevel, i, dest); 6413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_NULL; 6414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, second, buff); 6417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru first = second; 6418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru j = 0; 6420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(bocsuBytesUsed != 0) { 6421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(bocsuBytesUsed-->0) { 6422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru j++; 6423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i < count && j < bocsuBytesWritten) { 6427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[i++] = buff[j++]; 6428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru level = UCOL_PSK_NULL; 6433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to next level */ 6435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_PSK_NULL: 6436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru j = i; 6437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(j<count) { 6438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest[j++]=0; 6439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 6442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INTERNAL_PROGRAM_ERROR; 6443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_STATUS(*status); 6444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusaveState: 6448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now we need to return stuff. First we want to see whether we have 6449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // done everything for the current state of iterator. 6450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(byteCountOrFrenchDone 6451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || canUpdateState == FALSE 6452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru || (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE) 6453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Any of above mean that the previous transaction 6455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // wasn't finished and that we should store the 6456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // previous iterator state. 6457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru state[0] = iterState; 6458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The transaction is complete. We will continue in the next iteration. 6460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[0] = s.iterator->getState(s.iterator); 6461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cces = 0; 6462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Store the number of bocsu bytes written. 6464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) { 6465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 6466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BYTES_SHIFT; 6468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Next we put in the level of comparison 6470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT); 6471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we are doing French, we need to store whether we have just finished the French level 6473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level == UCOL_PSK_SECONDARY && doingFrench) { 6474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru state[1] |= (((state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT); 6475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru state[1] |= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT); 6477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Was the latest CE shifted 6480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(wasShifted) { 6481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru state[1] |= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT; 6482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check for cces overflow 6484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) { 6485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 6486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Store cces 6488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[1] |= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SHIFT); 6489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check for French overflow 6491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) { 6492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 6493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Store number of bytes written in the French secondary continuation sequence 6495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state[1] |= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENCH_SHIFT); 6496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we have used normalizing iterator, get rid of it 6499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(normIter != NULL) { 6500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(normIter); 6501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* To avoid memory leak, free the offset buffer if necessary. */ 6504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_freeOffsetBuffer(&s); 6505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Return number of meaningful sortkey bytes. 6507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d", 6508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest,i, state[0], state[1]); 6509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_VALUE(i); 6510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return i; 6511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 6514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Produce a bound for a given sortkey and a number of levels. 6515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 6516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 6517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getBound(const uint8_t *source, 6518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 6519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColBoundMode boundType, 6520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t noOfLevels, 6521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *result, 6522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t resultLength, 6523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) 6524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 6525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // consistency checks 6526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(status == NULL || U_FAILURE(*status)) { 6527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source == NULL) { 6530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t sourceIndex = 0; 6535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Scan the string until we skip enough of the key OR reach the end of the key 6536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 6537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sourceIndex++; 6538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(source[sourceIndex] == UCOL_LEVELTERMINATOR) { 6539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru noOfLevels--; 6540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while (noOfLevels > 0 6542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru && (source[sourceIndex] != 0 || sourceIndex < sourceLength)); 6543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((source[sourceIndex] == 0 || sourceIndex == sourceLength) 6545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru && noOfLevels > 0) { 6546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_SORT_KEY_TOO_SHORT_WARNING; 6547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // READ ME: this code assumes that the values for boundType 6551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // enum will not changes. They are set so that the enum value 6552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // corresponds to the number of extra bytes each bound type 6553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // needs. 6554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(result != NULL && resultLength >= sourceIndex+boundType) { 6555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(result, source, sourceIndex); 6556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch(boundType) { 6557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Lower bound just gets terminated. No extra bytes 6558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_BOUND_LOWER: // = 0 6559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Upper bound needs one extra byte 6561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_BOUND_UPPER: // = 1 6562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result[sourceIndex++] = 2; 6563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Upper long bound needs two extra bytes 6565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_BOUND_UPPER_LONG: // = 2 6566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result[sourceIndex++] = 0xFF; 6567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result[sourceIndex++] = 0xFF; 6568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: 6570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result[sourceIndex++] = 0; 6574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return sourceIndex; 6576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return sourceIndex+boundType+1; 6578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 6582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the functions that deal with the properties of a collator */ 6583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there are new APIs and some compatibility APIs */ 6584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 6585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void 6587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE, 6588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t *primShift, int32_t *secShift, int32_t *terShift) 6589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 6590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0; 6591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool reverseSecondary = FALSE; 659227f654740f2a26ad62a5c155af9199af9e69b889claireho UBool continuation = isContinuation(CE); 659327f654740f2a26ad62a5c155af9199af9e69b889claireho if(!continuation) { 6594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary = (uint8_t)((CE & coll->tertiaryMask)); 6595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary ^= coll->caseSwitch; 6596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru reverseSecondary = TRUE; 6597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION)); 6599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tertiary &= UCOL_REMOVE_CASE; 6600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru reverseSecondary = FALSE; 6601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK); 6604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK); 6605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primary1 = (uint8_t)(CE >> 8); 6606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary1 != 0) { 660827f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->leadBytePermutationTable != NULL && !continuation) { 660927f654740f2a26ad62a5c155af9199af9e69b889claireho primary1 = coll->leadBytePermutationTable[primary1]; 661027f654740f2a26ad62a5c155af9199af9e69b889claireho } 661127f654740f2a26ad62a5c155af9199af9e69b889claireho 6612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] |= (primary1 << *primShift); 6613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primShift -= 8; 6614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primary2 != 0) { 6616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(*primShift < 0) { 6617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE; 6618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; 6619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; 6620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 6621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] |= (primary2 << *primShift); 6623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *primShift -= 8; 6624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secondary != 0) { 6626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse secondary 6627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space for secondary 6628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24); 6629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // normal case 6630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secShift); 6631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *secShift -= 8; 6633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tertiary != 0) { 6635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+ch] |= (tertiary << *terShift); 6636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *terShift -= 8; 6637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool 6641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_resizeLatinOneTable(UCollator *coll, int32_t size, UErrorCode *status) { 6642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *newTable = (uint32_t *)uprv_malloc(size*sizeof(uint32_t)*3); 6643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(newTable == NULL) { 6644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 6645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->latinOneFailed = TRUE; 6646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 6647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTableLen)*sizeof(uint32_t); 6649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(newTable, 0, size*sizeof(uint32_t)*3); 6650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy); 6651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToCopy); 6652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(newTable+2*size, coll->latinOneCEs+2*coll->latinOneTableLen, sizeToCopy); 6653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->latinOneTableLen = size; 6654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(coll->latinOneCEs); 6655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->latinOneCEs = newTable; 6656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 6657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 6660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setUpLatinOne(UCollator *coll, UErrorCode *status) { 6661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool result = TRUE; 6662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->latinOneCEs == NULL) { 6663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*UCOL_LATINONETABLELEN*3); 6664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->latinOneCEs == NULL) { 6665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 6666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 6667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneTableLen = UCOL_LATINONETABLELEN; 6669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar ch = 0; 6671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationElements *it = ucol_openElements(coll, &ch, 1, status); 6672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for null pointer 6673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(*status)) { 6674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 6675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)*coll->latinOneTableLen*3); 6677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t primShift = 24, secShift = 24, terShift = 24; 6679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t CE = 0; 6680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1; 6681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: make safe if you get more than you wanted... 6683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) { 6684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primShift = 24; secShift = 24; terShift = 24; 6685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ch < 0x100) { 6686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = coll->latinOneMapping[ch]; 6687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 6689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE == UCOL_NOT_FOUND && coll->UCA) { 6690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 6691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE < UCOL_NOT_FOUND) { 6694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift); 6695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch (getCETag(CE)) { 6697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case EXPANSION_TAG: 6698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case DIGIT_TAG: 6699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setText(it, &ch, 1, status); 6700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) { 6701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(primShift < 0 || secShift < 0 || terShift < 0) { 6702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE; 6703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; 6704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; 6705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift); 6708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case CONTRACTION_TAG: 6711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // here is the trick 6712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // F2 is contraction. We do something very similar to contractions 6713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // but have two indices, one in the real contraction table and the 6714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // other to where we stuffed things. This hopes that we don't have 6715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // many contractions (this should work for latin-1 tables). 6716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((CE & 0x00FFF000) != 0) { 6718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_UNSUPPORTED_ERROR; 6719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE); 6723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table 6725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[ch] = CE; 6727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+ch] = CE; 6728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE; 6729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We're going to jump into contraction table, pick the elements 6731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // and use them 6732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 6733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CE = *(coll->contractionCEs + 6734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (UCharOffset - coll->contractionIndex)); 6735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) { 6736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t size; 6737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t i; /* general counter */ 6738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */ 6739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru size = getExpansionCount(CE); 6740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //CE = *CEOffset++; 6741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */ 6742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 0; i<size; i++) { 6743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primShift < 0 || secShift < 0 || terShift < 0) { 6744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift); 6750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* else, we do */ 6752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(*CEOffset != 0) { 6753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(primShift < 0 || secShift < 0 || terShift < 0) { 6754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift); 6760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru contractionOffset++; 6763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(CE < UCOL_NOT_FOUND) { 6764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, (UChar)contractionOffset++, CE, &primShift, &secShift, &terShift); 6765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; 6769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru contractionOffset++; 6770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharOffset++; 6772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru primShift = 24; secShift = 24; terShift = 24; 6773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate 6774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) { 6775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while(*UCharOffset != 0xFFFF); 6779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break;; 6781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case SPEC_PROC_TAG: 6782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 0xB7 is a precontext character defined in UCA5.1, a special 6784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // handle is implemeted in order to save LatinOne table for 6785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // most locales. 6786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ch==0xb7) { 6787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift); 6788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 6790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: 6795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // compact table 6800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(contractionOffset < coll->latinOneTableLen) { 6801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) { 6802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup_after_failure; 6803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(it); 6806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 6807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querucleanup_after_failure: 6809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // status should already be set before arriving here. 6810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneFailed = TRUE; 6811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(it); 6812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 6813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid ucol_updateInternalState(UCollator *coll, UErrorCode *status) { 6816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 6817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseFirst == UCOL_UPPER_FIRST) { 6818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->caseSwitch = UCOL_CASE_SWITCH; 6819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->caseSwitch = UCOL_NO_CASE_SWITCH; 6821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) { 6824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryMask = UCOL_REMOVE_CASE; 6825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryCommon = UCOL_COMMON3_NORMAL; 6826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF; /* Should be 0x80 */ 6827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF; 6828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryBottom = UCOL_COMMON_BOT3; 6829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryMask = UCOL_KEEP_CASE; 6831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON; 6832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseFirst == UCOL_UPPER_FIRST) { 6833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST; 6834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER; 6835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER; 6836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryCommon = UCOL_COMMON3_NORMAL; 6838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER; 6839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER; 6840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Set the compression values */ 6844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - UCOL_COMMON_BOT3-1); 6845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal); /* we multilply double with int, but need only int */ 6846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount); 6847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY 6849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE) 6850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->sortKeyGen = ucol_calcSortKeySimpleTertiary; 6852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->sortKeyGen = ucol_calcSortKey; 6854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF 6856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed) 6857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 6858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) { 6859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it 6860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "F"); 6861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneUse = TRUE; 6862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneUse = FALSE; 6864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(*status == U_UNSUPPORTED_ERROR) { 6866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ZERO_ERROR; 6867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // latin1Table exists and it doesn't need to be regenerated, just use it 6869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneUse = TRUE; 6870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneUse = FALSE; 6873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 6878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) { 6879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(len == -1) { 6883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru len = u_strlen(varTop); 6884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(len == 0) { 6886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate s; 689150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, varTop, len, &s, status); 689250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 689350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 689450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 6895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t CE = ucol_IGetNextCE(coll, &s, status); 6897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* here we check if we have consumed all characters */ 6899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* you can put in either one character or a contraction */ 6900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* you shouldn't put more... */ 6901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(s.pos != s.endp || CE == UCOL_NO_MORE_CES) { 6902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_CE_NOT_FOUND_ERROR; 6903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t nextCE = ucol_IGetNextCE(coll, &s, status); 6907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) { 6909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_PRIMARY_TOO_LONG_ERROR; 6910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) { 6913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->variableTopValueisDefault = FALSE; 6914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16; 6915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 6917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* To avoid memory leak, free the offset buffer if necessary. */ 6918b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_freeOffsetBuffer(&s); 6919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return CE & UCOL_PRIMARYMASK; 6921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) { 6924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 6926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return coll->variableTopValue<<16; 6928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 6931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) { 6932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 6934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) { 6937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->variableTopValueisDefault = FALSE; 6938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16; 6939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Attribute setter API */ 6942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 6943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) { 6944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 6945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 6946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColAttributeValue oldFrench = coll->frenchCollation; 6948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColAttributeValue oldCaseFirst = coll->caseFirst; 6949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(attr) { 6950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_NUMERIC_COLLATION: /* sort substrings of digits as numbers */ 6951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(value == UCOL_ON) { 6952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollation = UCOL_ON; 6953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollationisDefault = FALSE; 6954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollation = UCOL_OFF; 6956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollationisDefault = FALSE; 6957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollationisDefault = TRUE; 6959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->numericCollation = (UColAttributeValue)coll->options->numericCollation; 6960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_HIRAGANA_QUATERNARY_MODE: /* special quaternary values for Hiragana */ 6965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(value == UCOL_ON) { 6966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQ = UCOL_ON; 6967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQisDefault = FALSE; 6968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQ = UCOL_OFF; 6970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQisDefault = FALSE; 6971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQisDefault = TRUE; 6973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->hiraganaQ = (UColAttributeValue)coll->options->hiraganaQ; 6974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 6975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 6976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/ 6979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_ON) { 6980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollation = UCOL_ON; 6981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollationisDefault = FALSE; 6982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_OFF) { 6983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollation = UCOL_OFF; 6984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollationisDefault = FALSE; 6985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 6986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollationisDefault = TRUE; 6987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->frenchCollation = (UColAttributeValue)coll->options->frenchCollation; 6988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 6989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 6990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ 6993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_SHIFTED) { 6994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandling = UCOL_SHIFTED; 6995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandlingisDefault = FALSE; 6996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_NON_IGNORABLE) { 6997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandling = UCOL_NON_IGNORABLE; 6998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandlingisDefault = FALSE; 6999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 7000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandlingisDefault = TRUE; 7001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->alternateHandling = (UColAttributeValue)coll->options->alternateHandling ; 7002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 7004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ 7007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_LOWER_FIRST) { 7008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirst = UCOL_LOWER_FIRST; 7009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirstisDefault = FALSE; 7010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_UPPER_FIRST) { 7011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirst = UCOL_UPPER_FIRST; 7012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirstisDefault = FALSE; 7013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_OFF) { 7014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->caseFirst = UCOL_OFF; 7015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->caseFirstisDefault = FALSE; 7016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 7017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirst = (UColAttributeValue)coll->options->caseFirst; 7018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseFirstisDefault = TRUE; 7019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 7021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_CASE_LEVEL: /* do we have an extra case level */ 7024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_ON) { 7025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevel = UCOL_ON; 7026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevelisDefault = FALSE; 7027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_OFF) { 7028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevel = UCOL_OFF; 7029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevelisDefault = FALSE; 7030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 7031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevel = (UColAttributeValue)coll->options->caseLevel; 7032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->caseLevelisDefault = TRUE; 7033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 7035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ 7038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value == UCOL_ON) { 7039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationMode = UCOL_ON; 7040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationModeisDefault = FALSE; 704127f654740f2a26ad62a5c155af9199af9e69b889claireho initializeFCD(status); 7042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_OFF) { 7043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationMode = UCOL_OFF; 7044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationModeisDefault = FALSE; 7045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value == UCOL_DEFAULT) { 7046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationModeisDefault = TRUE; 7047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->normalizationMode = (UColAttributeValue)coll->options->normalizationMode; 704827f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->normalizationMode == UCOL_ON) { 704927f654740f2a26ad62a5c155af9199af9e69b889claireho initializeFCD(status); 705027f654740f2a26ad62a5c155af9199af9e69b889claireho } 7051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 7053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_STRENGTH: /* attribute for strength */ 7056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (value == UCOL_DEFAULT) { 7057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->strengthisDefault = TRUE; 7058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->strength = (UColAttributeValue)coll->options->strength; 7059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (value <= UCOL_IDENTICAL) { 7060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->strengthisDefault = FALSE; 7061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coll->strength = value; 7062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR ; 7064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_ATTRIBUTE_COUNT: 7067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 7068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 7069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) { 7072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneRegenTable = TRUE; 7073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru coll->latinOneRegenTable = FALSE; 7075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_updateInternalState(coll, status); 7077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UColAttributeValue U_EXPORT2 7080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) { 7081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL) { 7082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_DEFAULT; 7083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(attr) { 7085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_NUMERIC_COLLATION: 7086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->numericCollation; 7087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_HIRAGANA_QUATERNARY_MODE: 7088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->hiraganaQ; 7089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/ 7090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->frenchCollation; 7091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ 7092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->alternateHandling; 7093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ 7094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->caseFirst; 7095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_CASE_LEVEL: /* do we have an extra case level */ 7096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->caseLevel; 7097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ 7098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->normalizationMode; 7099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_STRENGTH: /* attribute for strength */ 7100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return coll->strength; 7101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCOL_ATTRIBUTE_COUNT: 7102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 7103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 7104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_DEFAULT; 7107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 7110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setStrength( UCollator *coll, 7111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationStrength strength) 7112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 7113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 7114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); 7115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationStrength U_EXPORT2 7118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getStrength(const UCollator *coll) 7119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 7120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 7121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return ucol_getAttribute(coll, UCOL_STRENGTH, &status); 7122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 712427f654740f2a26ad62a5c155af9199af9e69b889clairehoU_INTERNAL int32_t U_EXPORT2 712527f654740f2a26ad62a5c155af9199af9e69b889clairehoucol_getReorderCodes(const UCollator *coll, 712627f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t *dest, 712727f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t destCapacity, 712827f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *pErrorCode) { 712927f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(*pErrorCode)) { 713027f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 713127f654740f2a26ad62a5c155af9199af9e69b889claireho } 713227f654740f2a26ad62a5c155af9199af9e69b889claireho 713327f654740f2a26ad62a5c155af9199af9e69b889claireho if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { 713427f654740f2a26ad62a5c155af9199af9e69b889claireho *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 713527f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 713627f654740f2a26ad62a5c155af9199af9e69b889claireho } 713727f654740f2a26ad62a5c155af9199af9e69b889claireho 713827f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->reorderCodesLength > destCapacity) { 713927f654740f2a26ad62a5c155af9199af9e69b889claireho *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 714027f654740f2a26ad62a5c155af9199af9e69b889claireho return coll->reorderCodesLength; 714127f654740f2a26ad62a5c155af9199af9e69b889claireho } 714227f654740f2a26ad62a5c155af9199af9e69b889claireho for (int32_t i = 0; i < coll->reorderCodesLength; i++) { 714327f654740f2a26ad62a5c155af9199af9e69b889claireho dest[i] = coll->reorderCodes[i]; 714427f654740f2a26ad62a5c155af9199af9e69b889claireho } 714527f654740f2a26ad62a5c155af9199af9e69b889claireho return coll->reorderCodesLength; 714627f654740f2a26ad62a5c155af9199af9e69b889claireho} 714727f654740f2a26ad62a5c155af9199af9e69b889claireho 714827f654740f2a26ad62a5c155af9199af9e69b889clairehoU_INTERNAL void U_EXPORT2 714927f654740f2a26ad62a5c155af9199af9e69b889clairehoucol_setReorderCodes(UCollator *coll, 715027f654740f2a26ad62a5c155af9199af9e69b889claireho const int32_t *reorderCodes, 715127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t reorderCodesLength, 715227f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *pErrorCode) { 715327f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(*pErrorCode)) { 715427f654740f2a26ad62a5c155af9199af9e69b889claireho return; 715527f654740f2a26ad62a5c155af9199af9e69b889claireho } 715627f654740f2a26ad62a5c155af9199af9e69b889claireho 715727f654740f2a26ad62a5c155af9199af9e69b889claireho if (reorderCodesLength < 0 || (reorderCodesLength > 0 && reorderCodes == NULL)) { 715827f654740f2a26ad62a5c155af9199af9e69b889claireho *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 715927f654740f2a26ad62a5c155af9199af9e69b889claireho return; 716027f654740f2a26ad62a5c155af9199af9e69b889claireho } 716127f654740f2a26ad62a5c155af9199af9e69b889claireho 716227f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(coll->reorderCodes); 716327f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodes = NULL; 716427f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodesLength = 0; 716527f654740f2a26ad62a5c155af9199af9e69b889claireho if (reorderCodesLength == 0) { 716627f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(coll->leadBytePermutationTable); 716727f654740f2a26ad62a5c155af9199af9e69b889claireho coll->leadBytePermutationTable = NULL; 716827f654740f2a26ad62a5c155af9199af9e69b889claireho return; 716927f654740f2a26ad62a5c155af9199af9e69b889claireho } 717027f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodes = (int32_t*) uprv_malloc(reorderCodesLength * sizeof(int32_t)); 717127f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->reorderCodes == NULL) { 717227f654740f2a26ad62a5c155af9199af9e69b889claireho *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 717327f654740f2a26ad62a5c155af9199af9e69b889claireho return; 717427f654740f2a26ad62a5c155af9199af9e69b889claireho } 717527f654740f2a26ad62a5c155af9199af9e69b889claireho for (int32_t i = 0; i < reorderCodesLength; i++) { 717627f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodes[i] = reorderCodes[i]; 717727f654740f2a26ad62a5c155af9199af9e69b889claireho } 717827f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodesLength = reorderCodesLength; 717927f654740f2a26ad62a5c155af9199af9e69b889claireho ucol_buildPermutationTable(coll, pErrorCode); 718027f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(*pErrorCode)) { 718127f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(coll->reorderCodes); 718227f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodes = NULL; 718327f654740f2a26ad62a5c155af9199af9e69b889claireho coll->reorderCodesLength = 0; 718427f654740f2a26ad62a5c155af9199af9e69b889claireho } 718527f654740f2a26ad62a5c155af9199af9e69b889claireho} 718627f654740f2a26ad62a5c155af9199af9e69b889claireho 718727f654740f2a26ad62a5c155af9199af9e69b889claireho 7188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 7189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are misc functions */ 7190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there are new APIs and some compatibility APIs */ 7191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 7192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 7194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getVersion(const UCollator* coll, 7195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVersionInfo versionInfo) 7196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 7197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* RunTime version */ 7198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t rtVersion = UCOL_RUNTIME_VERSION; 7199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Builder version*/ 7200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t bdVersion = coll->image->version[0]; 7201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Charset Version. Need to get the version from cnv files 7203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * makeconv should populate cnv files with version and 7204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an api has to be provided in ucnv.h to obtain this version 7205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 7206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t csVersion = 0; 7207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* combine the version info */ 7209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t cmbVersion = (uint16_t)((rtVersion<<11) | (bdVersion<<6) | (csVersion)); 7210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Tailoring rules */ 7212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru versionInfo[0] = (uint8_t)(cmbVersion>>8); 7213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru versionInfo[1] = (uint8_t)cmbVersion; 7214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru versionInfo[2] = coll->image->version[1]; 7215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->UCA) { 7216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* Include the minor number when getting the UCA version. (major & 1f) << 3 | (minor & 7) */ 7217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru versionInfo[3] = (coll->UCA->image->UCAVersion[0] & 0x1f) << 3 | (coll->UCA->image->UCAVersion[1] & 0x07); 7218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru versionInfo[3] = 0; 7220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This internal API checks whether a character is tailored or not */ 7225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 7226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) { 7227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status) || coll == NULL || coll == coll->UCA) { 7228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 7229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t CE = UCOL_NOT_FOUND; 7232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *ContractionStart = NULL; 7233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(u < 0x100) { /* latin-1 */ 7234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = coll->latinOneMapping[u]; 7235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) { 7236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 7237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* regular */ 7239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u); 7240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isContraction(CE)) { 7243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ContractionStart = (UChar *)coll->image+getContractOffset(CE); 7244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex)); 7245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(CE != UCOL_NOT_FOUND); 7248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 7252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the string compare functions */ 7253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 7254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/ 7255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_checkIdent internal function. Does byte level string compare. */ 7258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Used by strcoll if strength == identical and strings */ 725950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* are otherwise equal. */ 7260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 7261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Comparison must be done on NFD normalized strings. */ 7262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* FCD is not good enough. */ 7263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 7265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status) 7266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 726750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // When we arrive here, we can have normal strings or UCharIterators. Currently they are both 726850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of same type, but that doesn't really mean that it will stay that way. 7269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t comparison; 7270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sColl->flags & UCOL_USE_ITERATOR) { 727250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The division for the array length may truncate the array size to 727350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high 727450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // for all platforms anyway. 727550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 727650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 7277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UNormIterator *sNIt = NULL, *tNIt = NULL; 7278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status); 7279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status); 7280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl->iterator->move(sColl->iterator, 0, UITER_START); 7281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl->iterator->move(tColl->iterator, 0, UITER_START); 7282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, status); 7283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, status); 7284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru comparison = u_strCompareIter(sIt, tIt, TRUE); 7285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(sNIt); 7286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(tNIt); 7287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 728850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sLen = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl->endp - sColl->string) : -1; 728950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *sBuf = sColl->string; 729050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t tLen = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl->endp - tColl->string) : -1; 729150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *tBuf = tColl->string; 7292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (normalize) { 7294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ZERO_ERROR; 729550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note: We could use Normalizer::compare() or similar, but for short strings 729650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // which may not be in FCD it might be faster to just NFD them. 729750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather than 729850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // NFD'ing immediately might be faster for long strings, 729950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // but string comparison is usually done on relatively short strings. 730050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN) == 0, sBuf, sLen), 730150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sColl->writableBuffer, 730250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status); 730350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN) == 0, tBuf, tLen), 730450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tColl->writableBuffer, 730550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status); 730650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 730750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_LESS; 7308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 730950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writableBuffer); 7310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 731150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE); 7312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (comparison < 0) { 7316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_LESS; 7317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (comparison == 0) { 7318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_EQUAL; 7319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* comparison > 0 */ { 7320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_GREATER; 7321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* CEBuf - A struct and some inline functions to handle the saving */ 7325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* of CEs in a buffer within ucol_strcoll */ 7326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_CEBUF_SIZE 512 7328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct ucol_CEBuf { 7329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *buf; 7330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *endp; 7331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *pos; 7332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t localArray[UCOL_CEBUF_SIZE]; 7333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} ucol_CEBuf; 7334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 7337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void UCOL_INIT_CEBUF(ucol_CEBuf *b) { 7338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (b)->buf = (b)->pos = (b)->localArray; 7339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (b)->endp = (b)->buf + UCOL_CEBUF_SIZE; 7340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 7343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) { 7344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t oldSize; 7345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newSize; 7346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *newBuf; 7347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ci->flags |= UCOL_ITER_ALLOCATED; 734950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oldSize = (uint32_t)(b->pos - b->buf); 7350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newSize = oldSize * 2; 7351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t)); 7352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(newBuf == NULL) { 7353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 7354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 7356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t)); 7357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (b->buf != b->localArray) { 7358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(b->buf); 7359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru b->buf = newBuf; 7361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru b->endp = b->buf + newSize; 7362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru b->pos = b->buf + oldSize; 7363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic 7367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruinline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCode *status) { 7368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b->pos == b->endp) { 7369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_CEBuf_Expand(b, ci, status); 7370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_SUCCESS(*status)) { 7372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(b)->pos++ = ce; 7373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is a trick string compare function that goes in and uses sortkeys to compare */ 7377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* It is used when compare gets in trouble and needs to bail out */ 7378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UCollationResult ucol_compareUsingSortKeys(collIterate *sColl, 7379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru collIterate *tColl, 7380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 7381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 7382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER]; 7383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *sourceKeyP = sourceKey; 7384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *targetKeyP = targetKey; 7385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER; 7386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *coll = sColl->coll; 738750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *source = NULL; 738850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *target = NULL; 7389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = UCOL_EQUAL; 739050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString sourceString, targetString; 739150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t sourceLength; 739250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t targetLength; 7393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sColl->flags & UCOL_USE_ITERATOR) { 7395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sColl->iterator->move(sColl->iterator, 0, UITER_START); 7396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tColl->iterator->move(tColl->iterator, 0, UITER_START); 739750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 739850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((c=sColl->iterator->next(sColl->iterator))>=0) { 739950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sourceString.append((UChar)c); 740050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 740150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((c=tColl->iterator->next(tColl->iterator))>=0) { 740250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho targetString.append((UChar)c); 740350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 740450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho source = sourceString.getBuffer(); 740550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sourceLength = sourceString.length(); 740650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho target = targetString.getBuffer(); 740750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho targetLength = targetString.length(); 7408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // no iterators 740950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sColl->string):-1; 741050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tColl->string):-1; 7411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = sColl->string; 7412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru target = tColl->string; 7413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen); 7418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sourceKeyLen > UCOL_MAX_BUFFER) { 7419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sourceKeyP = (uint8_t*)uprv_malloc(sourceKeyLen*sizeof(uint8_t)); 7420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sourceKeyP == NULL) { 7421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 7422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto cleanup_and_do_compare; 7423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen); 7425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen); 7428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(targetKeyLen > UCOL_MAX_BUFFER) { 7429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targetKeyP = (uint8_t*)uprv_malloc(targetKeyLen*sizeof(uint8_t)); 7430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(targetKeyP == NULL) { 7431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 7432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto cleanup_and_do_compare; 7433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen); 7435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP); 7438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querucleanup_and_do_compare: 7440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sourceKeyP != NULL && sourceKeyP != sourceKey) { 7441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(sourceKeyP); 7442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(targetKeyP != NULL && targetKeyP != targetKey) { 7445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(targetKeyP); 7446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result<0) { 7449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_LESS; 7450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(result>0) { 7451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_GREATER; 7452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_EQUAL; 7454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 745850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult 745950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status) 7460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 7461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ALIGN_CODE(16); 7462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *coll = sColl->coll; 7464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // setting up the collator parameters 7467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColAttributeValue strength = coll->strength; 7468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool initialCheckSecTer = (strength >= UCOL_SECONDARY); 7469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkSecTer = initialCheckSecTer; 7471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkTertiary = (strength >= UCOL_TERTIARY); 7472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkQuad = (strength >= UCOL_QUATERNARY); 7473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkIdent = (strength == UCOL_IDENTICAL); 7474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool checkCase = (coll->caseLevel == UCOL_ON); 7475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer; 7476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool shifted = (coll->alternateHandling == UCOL_SHIFTED); 7477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool qShifted = shifted && checkQuad; 7478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad; 7479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(doHiragana && shifted) { 7481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (ucol_compareUsingSortKeys(sColl, tColl, status)); 7482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t caseSwitch = coll->caseSwitch; 7484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t tertiaryMask = coll->tertiaryMask; 7485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is the lowest primary value that will not be ignored if shifted 7487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0; 7488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationResult result = UCOL_EQUAL; 7490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationResult hirResult = UCOL_EQUAL; 7491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Preparing the CE buffers. They will be filled during the primary phase 7493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_CEBuf sCEs; 7494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_CEBuf tCEs; 7495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_INIT_CEBUF(&sCEs); 7496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_INIT_CEBUF(&tCEs); 7497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t secS = 0, secT = 0; 7499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sOrder=0, tOrder=0; 7500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Non shifted primary processing is quite simple 7502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!shifted) { 7503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 7504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We fetch CEs until we hit a non ignorable primary or end. 7506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 7507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We get the next CE 7508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_IGetNextCE(coll, sColl, status); 7509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Stuff it in the buffer 7510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // And keep just the primary part. 7512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sOrder &= UCOL_PRIMARYMASK; 7513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while(sOrder == 0); 7514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // see the comments on the above block 7516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 7517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_IGetNextCE(coll, tColl, status); 7518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder &= UCOL_PRIMARYMASK; 7520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while(tOrder == 0); 7521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if both primaries are the same 7523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 7524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // and there are no more CEs, we advance to the next level 7525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == UCOL_NO_MORE_CES_PRIMARY) { 7526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(doHiragana && hirResult == UCOL_EQUAL) { 7529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) { 7530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA)) 7531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ? UCOL_LESS:UCOL_GREATER; 7532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 753527f654740f2a26ad62a5c155af9199af9e69b889claireho // only need to check one for continuation 753627f654740f2a26ad62a5c155af9199af9e69b889claireho // if one is then the other must be or the preceding CE would be a prefix of the other 753727f654740f2a26ad62a5c155af9199af9e69b889claireho if (coll->leadBytePermutationTable != NULL && !isContinuation(sOrder)) { 753827f654740f2a26ad62a5c155af9199af9e69b889claireho sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF); 753927f654740f2a26ad62a5c155af9199af9e69b889claireho tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF); 754027f654740f2a26ad62a5c155af9199af9e69b889claireho } 7541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if two primaries are different, we are done 7542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (sOrder < tOrder) ? UCOL_LESS: UCOL_GREATER; 7543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } // no primary difference... do the rest from the buffers 7546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // shifted - do a slightly more complicated processing :) 7547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 7548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool sInShifted = FALSE; 7549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool tInShifted = FALSE; 7550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This version of code can be refactored. However, it seems easier to understand this way. 7551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Source loop. Sam as the target loop. 7552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_IGetNextCE(coll, sColl, status); 7554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == UCOL_NO_MORE_CES) { 7555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) { 7558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* UCA amendment - ignore ignorables that follow shifted code points */ 7559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(isContinuation(sOrder)) { 7561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */ 7562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sInShifted) { 7563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */ 7564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* Just lower level values */ 7571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sInShifted) { 7572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* regular */ 757927f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->leadBytePermutationTable != NULL){ 758027f654740f2a26ad62a5c155af9199af9e69b889claireho sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF); 758127f654740f2a26ad62a5c155af9199af9e69b889claireho } 7582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sOrder & UCOL_PRIMARYMASK) > LVT) { 7583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sOrder & UCOL_PRIMARYMASK) > 0) { 7587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = TRUE; 7588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder &= UCOL_PRIMARYMASK; 7589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); 7593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = FALSE; 7594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder &= UCOL_PRIMARYMASK; 7600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = FALSE; 7601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 7602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_IGetNextCE(coll, tColl, status); 7604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder == UCOL_NO_MORE_CES) { 7605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) { 7608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* UCA amendment - ignore ignorables that follow shifted code points */ 7609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(isContinuation(tOrder)) { 7611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */ 7612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tInShifted) { 7613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */ 7614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* Just lower level values */ 7621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tInShifted) { 7622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* regular */ 762927f654740f2a26ad62a5c155af9199af9e69b889claireho if(coll->leadBytePermutationTable != NULL){ 763027f654740f2a26ad62a5c155af9199af9e69b889claireho tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF); 763127f654740f2a26ad62a5c155af9199af9e69b889claireho } 7632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tOrder & UCOL_PRIMARYMASK) > LVT) { 7633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((tOrder & UCOL_PRIMARYMASK) > 0) { 7637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = TRUE; 7638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder &= UCOL_PRIMARYMASK; 7639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); 7643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = FALSE; 7644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder &= UCOL_PRIMARYMASK; 7650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = FALSE; 7651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 7653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 7654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(doHiragana && hirResult == UCOL_EQUAL) { 7655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_HIRAGANA)) { 7656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA)) 7657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ? UCOL_LESS:UCOL_GREATER; 7658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 7661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == UCOL_NO_MORE_CES_PRIMARY) { 7662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = 0; 7665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = 0; 7666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 7669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER; 7670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } /* no primary difference... do the rest from the buffers */ 7673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* now, we're gonna reexamine collected CEs */ 7676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *sCE; 7677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *tCE; 7678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* This is the secondary level of comparison */ 7680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(checkSecTer) { 7681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isFrenchSec) { /* normal */ 7682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.buf; 7683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.buf; 7684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (secS == 0) { 7686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE++) & UCOL_SECONDARYMASK; 7687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(secT == 0) { 7690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE++) & UCOL_SECONDARYMASK; 7691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == secT) { 7694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == UCOL_NO_MORE_CES_SECONDARY) { 7695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; secT = 0; 7698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; 7702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { /* do the French */ 7706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *sCESave = NULL; 7707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *tCESave = NULL; 7708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.pos-2; /* this could also be sCEs-- if needs to be optimized */ 7709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.pos-2; 7710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (secS == 0 && sCE >= sCEs.buf) { 771227f654740f2a26ad62a5c155af9199af9e69b889claireho if(sCESave == NULL) { 7713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE--); 7714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(secS)) { 7715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(isContinuation(secS = *(sCE--))) 7716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ; 7717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* after this, secS has the start of continuation, and sCEs points before that */ 7718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCESave = sCE; /* we save it, so that we know where to come back AND that we need to go forward */ 7719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE+=2; /* need to point to the first continuation CP */ 7720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* However, now you can just continue doing stuff */ 7721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE++); 7724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(secS)) { /* This means we have finished with this cont */ 7725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCESave; /* reset the pointer to before continuation */ 772627f654740f2a26ad62a5c155af9199af9e69b889claireho sCESave = NULL; 772727f654740f2a26ad62a5c155af9199af9e69b889claireho secS = 0; /* Fetch a fresh CE before the continuation sequence. */ 7728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS &= UCOL_SECONDARYMASK; /* remove the continuation bit */ 7732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(secT == 0 && tCE >= tCEs.buf) { 773527f654740f2a26ad62a5c155af9199af9e69b889claireho if(tCESave == NULL) { 7736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE--); 7737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(secT)) { 7738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(isContinuation(secT = *(tCE--))) 7739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ; 7740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* after this, secS has the start of continuation, and sCEs points before that */ 7741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCESave = tCE; /* we save it, so that we know where to come back AND that we need to go forward */ 7742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE+=2; /* need to point to the first continuation CP */ 7743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* However, now you can just continue doing stuff */ 7744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE++); 7747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(secT)) { /* This means we have finished with this cont */ 7748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCESave; /* reset the pointer to before continuation */ 774927f654740f2a26ad62a5c155af9199af9e69b889claireho tCESave = NULL; 775027f654740f2a26ad62a5c155af9199af9e69b889claireho secT = 0; /* Fetch a fresh CE before the continuation sequence. */ 7751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT &= UCOL_SECONDARYMASK; /* remove the continuation bit */ 7755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == secT) { 7758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == UCOL_NO_MORE_CES_SECONDARY || (sCE < sCEs.buf && tCE < tCEs.buf)) { 7759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; secT = 0; 7762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; 7766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* doing the case bit */ 7773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(checkCase) { 7774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.buf; 7775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.buf; 7776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((secS & UCOL_REMOVE_CASE) == 0) { 7778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(*sCE++)) { 7779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS =*(sCE-1); 7780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((secS & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) { 7781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // primary ignorables should not be considered on the case level when the strength is primary 7782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise, the CEs stop being well-formed 7783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS &= UCOL_TERT_CASE_MASK; 7784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS ^= caseSwitch; 7785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((secT & UCOL_REMOVE_CASE) == 0) { 7794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(*tCE++)) { 7795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE-1); 7796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((secT & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) { 7797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // primary ignorables should not be considered on the case level when the strength is primary 7798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // otherwise, the CEs stop being well-formed 7799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT &= UCOL_TERT_CASE_MASK; 7800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT ^= caseSwitch; 7801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) { 7810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = UCOL_LESS; 7811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK)) { 7813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = UCOL_GREATER; 7814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY || (secT & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) { 7818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Tertiary level */ 7827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(checkTertiary) { 7828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.buf; 7831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.buf; 7832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((secS & UCOL_REMOVE_CASE) == 0) { 7834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE++) & tertiaryMask; 7835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(secS)) { 7836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS ^= caseSwitch; 7837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS &= UCOL_REMOVE_CASE; 7839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((secT & UCOL_REMOVE_CASE) == 0) { 7843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE++) & tertiaryMask; 7844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isContinuation(secT)) { 7845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT ^= caseSwitch; 7846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT &= UCOL_REMOVE_CASE; 7848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == secT) { 7852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((secS & UCOL_REMOVE_CASE) == 1) { 7853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; secT = 0; 7856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; 7860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(qShifted /*checkQuad*/) { 7867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool sInShifted = TRUE; 7868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool tInShifted = TRUE; 7869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; 7870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = 0; 7871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sCE = sCEs.buf; 7872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tCE = tCEs.buf; 7873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 787427f654740f2a26ad62a5c155af9199af9e69b889claireho while((secS == 0 && secS != UCOL_NO_MORE_CES) || (isContinuation(secS) && !sInShifted)) { 7875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = *(sCE++); 7876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(secS)) { 7877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!sInShifted) { 7878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(secS > LVT || (secS & UCOL_PRIMARYMASK) == 0) { /* non continuation */ 7881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = UCOL_PRIMARYMASK; 7882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = FALSE; 7883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sInShifted = TRUE; 7885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS &= UCOL_PRIMARYMASK; 7888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 789027f654740f2a26ad62a5c155af9199af9e69b889claireho while((secT == 0 && secT != UCOL_NO_MORE_CES) || (isContinuation(secT) && !tInShifted)) { 7891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = *(tCE++); 7892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isContinuation(secT)) { 7893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!tInShifted) { 7894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(secT > LVT || (secT & UCOL_PRIMARYMASK) == 0) { 7897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT = UCOL_PRIMARYMASK; 7898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = FALSE; 7899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tInShifted = TRUE; 7901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secT &= UCOL_PRIMARYMASK; 7904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == secT) { 7906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(secS == UCOL_NO_MORE_CES_PRIMARY) { 7907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 7908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru secS = 0; secT = 0; 7910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 7911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; 7914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(doHiragana && hirResult != UCOL_EQUAL) { 7918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If we're fine on quaternaries, we might be different 7919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // on Hiragana. This, however, might fail us in shifted. 7920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = hirResult; 7921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto commonReturn; 7922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* For IDENTICAL comparisons, we use a bitwise character comparison */ 7925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* as a tiebreaker if all else is equal. */ 7926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Getting here should be quite rare - strings are not identical - */ 7927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* that is checked first, but compared == through all other checks. */ 7928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(checkIdent) 7929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 7930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //result = ucol_checkIdent(&sColl, &tColl, coll->normalizationMode == UCOL_ON); 7931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ucol_checkIdent(sColl, tColl, TRUE, status); 7932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucommonReturn: 7935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((sColl->flags | tColl->flags) & UCOL_ITER_ALLOCATED) { 7936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sCEs.buf != sCEs.localArray ) { 7937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(sCEs.buf); 7938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (tCEs.buf != tCEs.localArray ) { 7940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(tCEs.buf); 7941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 7945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 7946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 794750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult 794850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoucol_strcollRegular(const UCollator *coll, 794950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *source, int32_t sourceLength, 795050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *target, int32_t targetLength, 795150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 795250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collIterate sColl, tColl; 795350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Preparing the context objects for iterating over strings 795450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, source, sourceLength, &sColl, status); 795550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, target, targetLength, &tColl, status); 795650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 795750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_LESS; 795850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 795950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(&sColl, &tColl, status); 796050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 7961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline uint32_t 7963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getLatinOneContraction(const UCollator *coll, int32_t strength, 7964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t CE, const UChar *s, int32_t *index, int32_t len) 7965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 7966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF); 7967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t latinOneOffset = (CE & 0x00FFF000) >> 12; 7968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t offset = 1; 7969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar schar = 0, tchar = 0; 7970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 7972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(len == -1) { 7973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(s[*index] == 0) { // end of string 7974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); 7975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = s[*index]; 7977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(*index == len) { 7980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); 7981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 7982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru schar = s[*index]; 7983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(schar > (tchar = *(UCharOffset+offset))) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ 7987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru offset++; 7988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (schar == tchar) { 7991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (*index)++; 7992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset+offset]); 7993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 7995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 7996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(schar & 0xFF00 /*> UCOL_ENDOFLATIN1RANGE*/) { 7997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_BAIL_OUT_CE; 7998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 7999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // skip completely ignorables 8000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar); 8001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isZeroCE == 0) { // we have to ignore completely ignorables 8002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (*index)++; 8003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 8004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); 8007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 8013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is a fast strcoll, geared towards text in Latin-1. 8014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It supports contractions of size two, French secondaries 8015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and case switching. You can use it with strengths primary 8016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to tertiary. It does not support shifted and case level. 8017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It relies on the table build by setupLatin1Table. If it 8018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * doesn't understand something, it will go to the regular 8019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * strcoll. 8020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 802150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult 8022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcollUseLatin1( const UCollator *coll, 8023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sLen, 8025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tLen, 8027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 8028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 8029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ALIGN_CODE(16); 8030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t strength = coll->strength; 8031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sIndex = 0, tIndex = 0; 8033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar sChar = 0, tChar = 0; 8034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sOrder=0, tOrder=0; 8035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool endOfSource = FALSE; 8037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *elements = coll->latinOneCEs; 8039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool haveContractions = FALSE; // if we have contractions in our string 8041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we cannot do French secondary 8042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do the primary level 8044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 8045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(sOrder==0) { // this loop skips primary ignorables 8046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // sOrder=getNextlatinOneCE(source); 8047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sLen==-1) { // handling zero terminated strings 8048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[sIndex++]; 8049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sChar==0) { 8050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 8051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // handling strings with known length 8054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIndex==sLen) { 8055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 8056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[sIndex++]; 8059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32) 8061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "R"); 806250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 8063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sOrder = elements[sChar]; 8065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder >= UCOL_NOT_FOUND) { // if we got a special 8066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // specials can basically be either contractions or bail-out signs. If we get anything 8067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // else, we'll bail out anywasy 8068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(getCETag(sOrder) == CONTRACTION_TAG) { 8069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOrder, source, &sIndex, sLen); 8070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru haveContractions = TRUE; // if there are contractions, we cannot do French secondary 8071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // However, if there are contractions in the table, but we always use just one char, 8072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // we might be able to do French. This should be checked out. 8073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { 8075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "S"); 807650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 8077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(tOrder==0) { // this loop skips primary ignorables 8082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // tOrder=getNextlatinOneCE(target); 8083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tLen==-1) { // handling zero terminated strings 8084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[tIndex++]; 8085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tChar==0) { 8086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { // this is different than source loop, 8087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // as we already know that source loop is done here, 8088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // so we can either finish the primary loop if both 8089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // strings are done or anounce the result if only 8090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // target is done. Same below. 8091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto endOfPrimLoop; 8092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // handling strings with known length 8097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tIndex==tLen) { 8098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 8099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto endOfPrimLoop; 8100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[tIndex++]; 8105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32) 8107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "R"); 810850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 8109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tOrder = elements[tChar]; 8111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder >= UCOL_NOT_FOUND) { 8112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Handling specials, see the comments for source 8113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(getCETag(tOrder) == CONTRACTION_TAG) { 8114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOrder, target, &tIndex, tLen); 8115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru haveContractions = TRUE; 8116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { 8118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "S"); 811950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 8120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { // source is finished, but target is not, say the result. 8124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 8125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { // if we have same CEs, we continue the loop 8128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sOrder = 0; tOrder = 0; 8129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 8130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // compare current top bytes 8132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(((sOrder^tOrder)&0xFF000000)!=0) { 8133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // top bytes differ, return difference 8134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder < tOrder) { 8135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 8136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder > tOrder) { 8137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24); 8140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // since we must return enum value 8141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // top bytes match, continue with following bytes 8144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sOrder<<=8; 8145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tOrder<<=8; 8146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruendOfPrimLoop: 8150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // after primary loop, we definitely know the sizes of strings, 8151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // so we set it and use simpler loop for secondaries and tertiaries 8152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sLen = sIndex; tLen = tIndex; 8153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(strength >= UCOL_SECONDARY) { 8154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // adjust the table beggining 8155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elements += coll->latinOneTableLen; 8156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = FALSE; 8157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll->frenchCollation == UCOL_OFF) { // non French 8159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This loop is a simplified copy of primary loop 8160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // at this point we know that whole strings are latin-1, so we don't 8161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // check for that. We also know that we only have contractions as 8162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // specials. 8163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sIndex = 0; tIndex = 0; 8164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 8165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(sOrder==0) { 8166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIndex==sLen) { 8167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 8168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[sIndex++]; 8171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = elements[sChar]; 8172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder > UCOL_NOT_FOUND) { 8173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, sOrder, source, &sIndex, sLen); 8174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(tOrder==0) { 8178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tIndex==tLen) { 8179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 8180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto endOfSecLoop; 8181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[tIndex++]; 8186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = elements[tChar]; 8187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder > UCOL_NOT_FOUND) { 8188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, tOrder, target, &tIndex, tLen); 8189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 8192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 8193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 8196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = 0; tOrder = 0; 8197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 8198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // see primary loop for comments on this 8200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((sOrder^tOrder)&0xFF000000)!=0) { 8201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder < tOrder) { 8202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 8203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder > tOrder) { 8204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder<<=8; 8208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder<<=8; 8209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // French 8212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(haveContractions) { // if we have contractions, we have to bail out 8213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // since we don't really know how to handle them here 821450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ucol_strcollRegular(coll, source, sLen, target, tLen, status); 8215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // For French, we go backwards 8217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sIndex = sLen; tIndex = tLen; 8218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 8219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(sOrder==0) { 8220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIndex==0) { 8221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 8222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[--sIndex]; 8225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = elements[sChar]; 8226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // don't even look for contractions 8227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(tOrder==0) { 8230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tIndex==0) { 8231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 8232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto endOfSecLoop; 8233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[--tIndex]; 8238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = elements[tChar]; 8239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // don't even look for contractions 8240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 8242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 8243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 8246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = 0; tOrder = 0; 8247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 8248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // see the primary loop for comments 8250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((sOrder^tOrder)&0xFF000000)!=0) { 8251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder < tOrder) { 8252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 8253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder > tOrder) { 8254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder<<=8; 8258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder<<=8; 8259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruendOfSecLoop: 8265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strength >= UCOL_TERTIARY) { 8266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // tertiary loop is the same as secondary (except no French) 8267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elements += coll->latinOneTableLen; 8268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sIndex = 0; tIndex = 0; 8269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = FALSE; 8270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 8271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(sOrder==0) { 8272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIndex==sLen) { 8273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru endOfSource = TRUE; 8274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar=source[sIndex++]; 8277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = elements[sChar]; 8278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder > UCOL_NOT_FOUND) { 8279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sOrder, source, &sIndex, sLen); 8280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(tOrder==0) { 8283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tIndex==tLen) { 8284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(endOfSource) { 8285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; // if both strings are at the end, they are equal 8286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar=target[tIndex++]; 8291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = elements[tChar]; 8292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tOrder > UCOL_NOT_FOUND) { 8293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tOrder, target, &tIndex, tLen); 8294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(endOfSource) { 8297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 8298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder == tOrder) { 8300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder = 0; tOrder = 0; 8301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 8302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 8303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(((sOrder^tOrder)&0xff000000)!=0) { 8304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sOrder < tOrder) { 8305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_LESS; 8306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(sOrder > tOrder) { 8307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_GREATER; 8308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sOrder<<=8; 8311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tOrder<<=8; 8312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_EQUAL; 8316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationResult U_EXPORT2 8320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcollIter( const UCollator *coll, 8321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharIterator *sIter, 8322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharIterator *tIter, 8323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) 8324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 8325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!status || U_FAILURE(*status)) { 8326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER); 8330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter); 8331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (sIter == tIter) { 8333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) 8334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sIter == NULL || tIter == NULL || coll == NULL) { 8337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 8338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) 8339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationResult result = UCOL_EQUAL; 8343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Preparing the context objects for iterating over strings 8345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru collIterate sColl, tColl; 834650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, NULL, -1, &sColl, status); 834750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IInit_collIterate(coll, NULL, -1, &tColl, status); 834850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*status)) { 834950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) 835050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UCOL_EQUAL; 835150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 8352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The division for the array length may truncate the array size to 8353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high 8354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // for all platforms anyway. 8355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 8356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; 8357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UNormIterator *sNormIter = NULL, *tNormIter = NULL; 8358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl.iterator = sIter; 8360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl.flags |= UCOL_USE_ITERATOR; 8361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl.flags |= UCOL_USE_ITERATOR; 8362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl.iterator = tIter; 8363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) { 8365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status); 8366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status); 8367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sColl.flags &= ~UCOL_ITER_NORM; 8368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status); 8370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status); 8371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tColl.flags &= ~UCOL_ITER_NORM; 8372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL; 8375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((sChar = sColl.iterator->next(sColl.iterator)) == 8377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (tChar = tColl.iterator->next(tColl.iterator))) { 8378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sChar == U_SENTINEL) { 8379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = UCOL_EQUAL; 8380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto end_compare; 8381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sChar == U_SENTINEL) { 8385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar = tColl.iterator->previous(tColl.iterator); 8386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(tChar == U_SENTINEL) { 8389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sChar = sColl.iterator->previous(sColl.iterator); 8390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar = sColl.iterator->previous(sColl.iterator); 8393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar = tColl.iterator->previous(tColl.iterator); 8394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ucol_unsafeCP((UChar)sChar, coll) || ucol_unsafeCP((UChar)tChar, coll)) 8396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 8397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We are stopped in the middle of a contraction. 8398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Scan backwards through the == part of the string looking for the start of the contraction. 8399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // It doesn't matter which string we scan, since they are the same in this region. 8400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do 8401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 8402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sChar = sColl.iterator->previous(sColl.iterator); 8403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tChar = tColl.iterator->previous(tColl.iterator); 8404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll)); 8406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_SUCCESS(*status)) { 8410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = ucol_strcollRegular(&sColl, &tColl, status); 8411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruend_compare: 8414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(sNormIter || tNormIter) { 8415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(sNormIter); 8416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unorm_closeIter(tNormIter); 8417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE_STATUS(result, *status) 8420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 8421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 8425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_strcoll Main public API string comparison function */ 8426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* */ 8427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationResult U_EXPORT2 8428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcoll( const UCollator *coll, 8429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 8431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t targetLength) 8433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 8434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ALIGN_CODE(16); 8435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); 8437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 8438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); 8439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength); 8440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength); 8441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(source == NULL || target == NULL) { 8444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // do not crash, but return. Should have 8445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // status argument to return error. 8446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE(UCOL_EQUAL); 8447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Quick check if source and target are same strings. */ 8451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* They should either both be NULL terminated or the explicit length should be set on both. */ 8452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (source==target && sourceLength==targetLength) { 8453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE(UCOL_EQUAL); 8454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Scan the strings. Find: */ 8458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* The length of any leading portion that is equal */ 8459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Whether they are exactly equal. (in which case we just return) */ 8460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrc = source; 8461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pTarg = target; 8462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t equalLength; 8463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sourceLength == -1 && targetLength == -1) { 8465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Both strings are null terminated. 8466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Scan through any leading equal portion. 8467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (*pSrc == *pTarg && *pSrc != 0) { 8468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc++; 8469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pTarg++; 8470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pSrc == 0 && *pTarg == 0) { 8472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_VALUE(UCOL_EQUAL); 8473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_EQUAL; 8474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 847550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho equalLength = (int32_t)(pSrc - source); 8476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 8478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 8479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // One or both strings has an explicit length. 8480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrcEnd = source + sourceLength; 8481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pTargEnd = target + targetLength; 8482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan while the strings are bitwise ==, or until one is exhausted. 8484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 8485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (pSrc == pSrcEnd || pTarg == pTargEnd) { 8486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLength == -1)) { 8489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (*pSrc != *pTarg) { 8492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 8493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pSrc++; 8495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pTarg++; 8496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 849750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho equalLength = (int32_t)(pSrc - source); 8498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If we made it all the way through both strings, we are done. They are == 8500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((pSrc ==pSrcEnd || (pSrcEnd <pSrc && *pSrc==0)) && /* At end of src string, however it was specified. */ 8501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0))) /* and also at end of dest string */ 8502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 8503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTRACE_EXIT_VALUE(UCOL_EQUAL); 8504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_EQUAL; 8505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 8506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (equalLength > 0) { 8508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* There is an identical portion at the beginning of the two strings. */ 8509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* If the identical portion ends within a contraction or a comibining */ 8510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* character sequence, back up to the start of that sequence. */ 8511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // These values should already be set by the code above. 8513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //pSrc = source + equalLength; /* point to the first differing chars */ 8514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //pTarg = target + equalLength; 851527f654740f2a26ad62a5c155af9199af9e69b889claireho if ((pSrc != source+sourceLength && ucol_unsafeCP(*pSrc, coll)) || 851627f654740f2a26ad62a5c155af9199af9e69b889claireho (pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll))) 8517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 8518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are stopped in the middle of a contraction. 8519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan backwards through the == part of the string looking for the start of the contraction. 8520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // It doesn't matter which string we scan, since they are the same in this region. 8521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do 8522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 8523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru equalLength--; 8524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc--; 8525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (equalLength>0 && ucol_unsafeCP(*pSrc, coll)); 8527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source += equalLength; 8530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru target += equalLength; 8531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sourceLength > 0) { 8532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sourceLength -= equalLength; 8533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (targetLength > 0) { 8535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targetLength -= equalLength; 8536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 8540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationResult returnVal; 8541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) { 854250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targetLength, &status); 8543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 8544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status); 8545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRACE_EXIT_VALUE(returnVal); 8547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return returnVal; 8548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */ 8551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 8552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_greater( const UCollator *coll, 8553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 8555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t targetLength) 8557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 8558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 8559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru == UCOL_GREATER); 8560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */ 8563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 8564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_greaterOrEqual( const UCollator *coll, 8565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 8567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t targetLength) 8569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 8570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 8571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru != UCOL_LESS); 8572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */ 8575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 8576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_equal( const UCollator *coll, 8577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *source, 8578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sourceLength, 8579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *target, 8580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t targetLength) 8581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 8582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 8583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru == UCOL_EQUAL); 8584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 8587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getUCAVersion(const UCollator* coll, UVersionInfo info) { 8588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(coll && coll->UCA) { 8589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo)); 8590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 8592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */ 8594