1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 1996-2010, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  ucol.cpp
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   US-ASCII
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   tab size:   8 (not used)
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification history
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date        Name      Comments
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 1996-1999   various members of ICU team maintained C API for collation framework
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 02/16/2001  synwee    Added internal method getPrevSpecialCE
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/01/2001  synwee    Added maxexpansion functionality.
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/coleitr.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unorm.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_imp.h"
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "bocsu.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unorm_it.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h"
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "utracimp.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "putilimp.h"
39c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "uassert.h"
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LAST_BYTE_MASK_           0xFF
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SECOND_LAST_BYTE_SHIFT_   8
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define ZERO_CC_LIMIT_            0xC0
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// this is static pointer to the normalizer fcdTrieIndex
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// it is always the same between calls to u_cleanup
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and therefore writing to it is not synchronized.
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is cleaned in ucol_cleanup
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const uint16_t *fcdTrieIndex=NULL;
59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Code points at fcdHighStart and above have a zero FCD value.
60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic UChar32 fcdHighStart = 0;
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// These are values from UCA required for
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// implicit generation and supressing sort key compression
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// they should regularly be in the UCA, but if one
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// is running without UCA, it could be a problem
6627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const int32_t maxRegularPrimary  = 0x7A;
67c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t minImplicitPrimary = 0xE0;
68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t maxImplicitPrimary = 0xE4;
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool U_CALLCONV
72c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruucol_cleanup(void)
73c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
74c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fcdTrieIndex = NULL;
75c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return TRUE;
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t U_CALLCONV
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_getFoldingOffset(uint32_t data) {
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (int32_t)(data&0xFFFFFF);
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8527f654740f2a26ad62a5c155af9199af9e69b889claireho// init FCD data
8627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline
8727f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool initializeFCD(UErrorCode *status) {
8827f654740f2a26ad62a5c155af9199af9e69b889claireho    if (fcdTrieIndex != NULL) {
8927f654740f2a26ad62a5c155af9199af9e69b889claireho        return TRUE;
9027f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
9127f654740f2a26ad62a5c155af9199af9e69b889claireho        // The result is constant, until the library is reloaded.
9227f654740f2a26ad62a5c155af9199af9e69b889claireho        fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
9327f654740f2a26ad62a5c155af9199af9e69b889claireho        ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
9427f654740f2a26ad62a5c155af9199af9e69b889claireho        return U_SUCCESS(*status);
9527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
9627f654740f2a26ad62a5c155af9199af9e69b889claireho}
9727f654740f2a26ad62a5c155af9199af9e69b889claireho
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
99c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruinline void IInit_collIterate(const UCollator *collator, const UChar *sourceString,
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                              int32_t sourceLen, collIterate *s,
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                              UErrorCode *status)
102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    (s)->string = (s)->pos = sourceString;
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->origFlags = 0;
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->flags = 0;
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sourceLen >= 0) {
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s->flags |= UCOL_ITER_HASLEN;
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (s)->endp = (UChar *)sourceString+sourceLen;
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* change to enable easier checking for end of string for fcdpositon */
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (s)->endp = NULL;
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->extendCEs = NULL;
115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->extendCEsSize = 0;
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->CEpos = (s)->toReturn = (s)->CEs;
117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->offsetBuffer = NULL;
118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->offsetBufferSize = 0;
119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->offsetReturn = (s)->offsetStore = NULL;
120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0;
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->coll = (collator);
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    (s)->nfd = Normalizer2Factory::getNFDInstance(*status);
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->fcdPosition = 0;
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(collator->normalizationMode == UCOL_ON) {
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (s)->flags |= UCOL_ITER_NORM;
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) {
128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        (s)->flags |= UCOL_HIRAGANA_Q;
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->iterator = NULL;
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //(s)->iteratorIndex = 0;
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void  U_EXPORT2
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_init_collIterate(const UCollator *collator, const UChar *sourceString,
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             int32_t sourceLen, collIterate *s,
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UErrorCode *status) {
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Out-of-line version for use from other files. */
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(collator, sourceString, sourceLen, s, status);
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI collIterate * U_EXPORT2
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_new_collIterate(UErrorCode *status) {
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    collIterate *s = new collIterate;
14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(s == NULL) {
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *status = U_MEMORY_ALLOCATION_ERROR;
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return s;
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI void U_EXPORT2
15650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_delete_collIterate(collIterate *s) {
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete s;
15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UBool U_EXPORT2
16150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_collIterateAtEnd(collIterate *s) {
16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return s == NULL || s->pos == s->endp;
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Backup the state of the collIterate struct data
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate to backup
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param backup storage
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void backupState(const collIterate *data, collIterateState *backup)
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->fcdPosition = data->fcdPosition;
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->flags       = data->flags;
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->origFlags   = data->origFlags;
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->pos         = data->pos;
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    backup->bufferaddress = data->writableBuffer.getBuffer();
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    backup->buffersize    = data->writableBuffer.length();
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->iteratorMove = 0;
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->iteratorIndex = 0;
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(data->iterator != NULL) {
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER_CURRENT);
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        backup->iteratorIndex = data->iterator->getState(data->iterator);
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // no we try to fixup if we're using a normalizing iterator and we get UITER_NO_STATE
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(backup->iteratorIndex == UITER_NO_STATE) {
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) {
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                backup->iteratorMove++;
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->iterator->move(data->iterator, -1, UITER_CURRENT);
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Loads the state into the collIterate struct data
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate to backup
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param backup storage
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param forwards boolean to indicate if forwards iteration is used,
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*        false indicates backwards iteration
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void loadState(collIterate *data, const collIterateState *backup,
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UBool        forwards)
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags       = backup->flags;
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->origFlags   = backup->origFlags;
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(data->iterator != NULL) {
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO);
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->iterator->setState(data->iterator, backup->iteratorIndex, &status);
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(backup->iteratorMove != 0) {
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->pos         = backup->pos;
217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & UCOL_ITER_INNORMBUF) &&
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->writableBuffer.getBuffer() != backup->bufferaddress) {
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        this is when a new buffer has been reallocated and we'll have to
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        calculate the new position.
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        note the new buffer has to contain the contents of the old buffer.
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (forwards) {
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            data->pos = data->writableBuffer.getTerminatedBuffer() +
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         (data->pos - backup->bufferaddress);
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* backwards direction */
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t temp = backup->buffersize -
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  (int32_t)(data->pos - backup->bufferaddress);
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writableBuffer.length() - temp);
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        this is alittle tricky.
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if we are initially not in the normalization buffer, even if we
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        normalize in the later stage, the data in the buffer will be
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ignored, since we skip back up to the data string.
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        however if we are already in the normalization buffer, any
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        further normalization will pull data into the normalization
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buffer and modify the fcdPosition.
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        since we are keeping the data in the buffer for use, the
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fcdPosition can not be reverted back.
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        arrgghh....
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->fcdPosition = backup->fcdPosition;
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
25350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool
25450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoreallocCEs(collIterate *data, int32_t newCapacity) {
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t *oldCEs = data->extendCEs;
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(oldCEs == NULL) {
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCEs = data->CEs;
25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length = data->CEpos - oldCEs;
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t *newCEs = (uint32_t *)uprv_malloc(newCapacity * 4);
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(newCEs == NULL) {
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uprv_memcpy(newCEs, oldCEs, length * 4);
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uprv_free(data->extendCEs);
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->extendCEs = newCEs;
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->extendCEsSize = newCapacity;
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->CEpos = newCEs + length;
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool
27350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoincreaseCEsCapacity(collIterate *data) {
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t oldCapacity;
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(data->extendCEs != NULL) {
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCapacity = data->extendCEsSize;
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCapacity = LENGTHOF(data->CEs);
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return reallocCEs(data, 2 * oldCapacity);
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoensureCEsCapacity(collIterate *data, int32_t minCapacity) {
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t oldCapacity;
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(data->extendCEs != NULL) {
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCapacity = data->extendCEsSize;
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCapacity = LENGTHOF(data->CEs);
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(minCapacity <= oldCapacity) {
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    oldCapacity *= 2;
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacity);
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29827f654740f2a26ad62a5c155af9199af9e69b889clairehovoid collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) {
29927f654740f2a26ad62a5c155af9199af9e69b889claireho    if(U_FAILURE(errorCode)) {
30027f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
30127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
30227f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuffer);
30327f654740f2a26ad62a5c155af9199af9e69b889claireho    if(length >= offsetBufferSize) {
30427f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE;
30527f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t *newBuffer = reinterpret_cast<int32_t *>(uprv_malloc(newCapacity * 4));
30627f654740f2a26ad62a5c155af9199af9e69b889claireho        if(newBuffer == NULL) {
30727f654740f2a26ad62a5c155af9199af9e69b889claireho            errorCode = U_MEMORY_ALLOCATION_ERROR;
30827f654740f2a26ad62a5c155af9199af9e69b889claireho            return;
30927f654740f2a26ad62a5c155af9199af9e69b889claireho        }
31027f654740f2a26ad62a5c155af9199af9e69b889claireho        if(length > 0) {
31127f654740f2a26ad62a5c155af9199af9e69b889claireho            uprv_memcpy(newBuffer, offsetBuffer, length * 4);
31227f654740f2a26ad62a5c155af9199af9e69b889claireho        }
31327f654740f2a26ad62a5c155af9199af9e69b889claireho        uprv_free(offsetBuffer);
31427f654740f2a26ad62a5c155af9199af9e69b889claireho        offsetBuffer = newBuffer;
31527f654740f2a26ad62a5c155af9199af9e69b889claireho        offsetStore = offsetBuffer + length;
31627f654740f2a26ad62a5c155af9199af9e69b889claireho        offsetBufferSize = newCapacity;
31727f654740f2a26ad62a5c155af9199af9e69b889claireho    }
31827f654740f2a26ad62a5c155af9199af9e69b889claireho    *offsetStore++ = offset;
31927f654740f2a26ad62a5c155af9199af9e69b889claireho}
32027f654740f2a26ad62a5c155af9199af9e69b889claireho
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collIter_eos()
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*     Checks for a collIterate being positioned at the end of
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*     its source string.
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_eos(collIterate *s) {
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s->flags & UCOL_USE_ITERATOR) {
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return !(s->iterator->hasNext(s->iterator));
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) {
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Null terminated string, but not at null, so not at end.
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   Whether in main or normalization buffer doesn't matter.
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // String with length.  Can't be in normalization buffer, which is always
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  null termintated.
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s->flags & UCOL_ITER_HASLEN) {
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (s->pos == s->endp);
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We are at a null termination, could be either normalization buffer or main string.
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((s->flags & UCOL_ITER_INNORMBUF) == 0) {
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // At null at end of main string.
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return TRUE;
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // At null at end of normalization buffer.  Need to check whether there there are
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   any characters left in the main buffer.
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s->origFlags & UCOL_USE_ITERATOR) {
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return !(s->iterator->hasNext(s->iterator));
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) {
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Null terminated main string.  fcdPosition is the 'return' position into main buf.
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (*s->fcdPosition == 0);
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Main string with an end pointer.
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return s->fcdPosition == s->endp;
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collIter_bos()
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*     Checks for a collIterate being positioned at the start of
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*     its source string.
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_bos(collIterate *source) {
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // if we're going backwards, we need to know whether there is more in the
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // iterator, even if we are in the side buffer
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) {
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return !source->iterator->hasPrevious(source->iterator);
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if (source->pos <= source->string ||
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      ((source->flags & UCOL_ITER_INNORMBUF) &&
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *(source->pos - 1) == 0 && source->fcdPosition == NULL)) {
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return FALSE;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_SimpleBos(collIterate *source) {
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // if we're going backwards, we need to know whether there is more in the
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // iterator, even if we are in the side buffer
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) {
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return !source->iterator->hasPrevious(source->iterator);
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if (source->pos == source->string) {
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return FALSE;
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //return (data->pos == data->string) ||
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the open/close functions                                   */
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                          */
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UCollator*
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_initFromBinary(const uint8_t *bin, int32_t length,
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UCollator *base,
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UCollator *fillIn,
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UErrorCode *status)
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollator *result = fillIn;
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status)) {
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(base == NULL) {
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // we don't support null base yet
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We need these and we could be running without UCA
423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uprv_uca_initImplicitConstants(status);
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCATableHeader *colData = (UCATableHeader *)bin;
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // do we want version check here? We're trying to figure out whether collators are compatible
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0)) ||
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        colData->version[0] != UCOL_BUILDER_VERSION)
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_COLLATOR_VERSION_MISMATCH;
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = ucol_initCollator((const UCATableHeader *)bin, result, base, status);
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(U_FAILURE(*status)){
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result->hasRealData = TRUE;
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(base) {
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                result = ucol_initCollator(base->image, result, base, status);
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const UCATableHeader *)bin)->options), status);
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(U_FAILURE(*status)){
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                result->hasRealData = FALSE;
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else {
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *status = U_USELESS_COLLATOR_ERROR;
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result->freeImageOnClose = FALSE;
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->actualLocale = NULL;
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->validLocale = NULL;
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->requestedLocale = NULL;
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->rules = NULL;
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->rulesLength = 0;
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->freeRulesOnClose = FALSE;
463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->ucaRules = NULL;
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollator* U_EXPORT2
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_openBinary(const uint8_t *bin, int32_t length,
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UCollator *base,
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UErrorCode *status)
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ucol_initFromBinary(bin, length, base, NULL, status);
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruucol_cloneBinary(const UCollator *coll,
477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 uint8_t *buffer, int32_t capacity,
478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 UErrorCode *status)
479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t length = 0;
481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status)) {
482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return length;
483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(capacity < 0) {
485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return length;
487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll->hasRealData == TRUE) {
489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        length = coll->image->size;
490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(length <= capacity) {
491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(buffer, coll->image, length);
492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_BUFFER_OVERFLOW_ERROR;
494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(length <= capacity) {
498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* build the UCATableHeader with minimal entries */
499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* do not copy the header from the UCA file because its values are wrong! */
500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* reset everything */
503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memset(buffer, 0, length);
504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* set the tailoring-specific values */
506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UCATableHeader *myData = (UCATableHeader *)buffer;
507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->size = length;
508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* offset for the options, the only part of the data that is present after the header */
510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->options = sizeof(UCATableHeader);
511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* need to always set the expansion value for an upper bound of the options */
513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->expansion = myData->options + sizeof(UColOptionSet);
514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->magic = UCOL_HEADER_MAGIC;
516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->isBigEndian = U_IS_BIG_ENDIAN;
517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->charSetFamily = U_CHARSET_FAMILY;
518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* copy UCA's version; genrb will override all but the builder version with tailoring data */
520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->jamoSpecial = coll->image->jamoSpecial;
526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* copy the collator options */
528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_BUFFER_OVERFLOW_ERROR;
531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return length;
534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollator* U_EXPORT2
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t * pBufferSize, UErrorCode *status)
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollator * localCollator;
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator);
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *stackBufferChars = (char *)stackBuffer;
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t imageSize = 0;
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t rulesSize = 0;
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t rulesPadding = 0;
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *image;
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *rules;
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool colAllocated = FALSE;
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool imageAllocated = FALSE;
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (status == NULL || U_FAILURE(*status)){
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((stackBuffer && !pBufferSize) || !coll){
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       *status = U_ILLEGAL_ARGUMENT_ERROR;
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (coll->rules && coll->freeRulesOnClose) {
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar);
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar));
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufferSizeNeeded += rulesSize + rulesPadding;
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (stackBuffer && *pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pBufferSize =  bufferSizeNeeded;
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Pointers on 64-bit platforms need to be aligned
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * on a 64-bit boundry in memory.
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (*pBufferSize > offsetUp) {
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pBufferSize -= offsetUp;
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            stackBufferChars += offsetUp;
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pBufferSize = 1;
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    stackBuffer = (void *)stackBufferChars;
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (stackBuffer == NULL || *pBufferSize < bufferSizeNeeded) {
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* allocate one here...*/
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded);
587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Null pointer check.
588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (stackBufferChars == NULL) {
589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        colAllocated = TRUE;
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(*status)) {
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_SAFECLONE_ALLOCATED_WARNING;
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator = (UCollator *)stackBufferChars;
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding);
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode tempStatus = U_ZERO_ERROR;
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus);
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (coll->freeImageOnClose) {
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        image = (uint8_t *)uprv_malloc(imageSize);
605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Null pointer check
606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (image == NULL) {
607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucol_cloneBinary(coll, image, imageSize, status);
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        imageAllocated = TRUE;
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        image = (uint8_t *)coll->image;
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollator, status);
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(*status)) {
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (coll->rules) {
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (coll->freeRulesOnClose) {
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            localCollator->rules = u_strcpy(rules, coll->rules);
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //bufferEnd += rulesSize;
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            localCollator->rules = coll->rules;
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        localCollator->freeRulesOnClose = FALSE;
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        localCollator->rulesLength = coll->rulesLength;
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(coll, (UColAttribute)i, status), status);
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // zero copies of pointers
638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    localCollator->actualLocale = NULL;
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator->validLocale = NULL;
640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    localCollator->requestedLocale = NULL;
641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    localCollator->ucaRules = coll->ucaRules; // There should only be one copy here.
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator->freeOnClose = colAllocated;
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator->freeImageOnClose = imageAllocated;
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return localCollator;
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_close(UCollator *coll)
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(coll != NULL) {
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // these are always owned by each UCollator struct,
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // so we always free them
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->validLocale != NULL) {
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll->validLocale);
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->actualLocale != NULL) {
659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_free(coll->actualLocale);
660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->requestedLocale != NULL) {
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll->requestedLocale);
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->latinOneCEs != NULL) {
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll->latinOneCEs);
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->options != NULL && coll->freeOptionsOnClose) {
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll->options);
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->rules != NULL && coll->freeRulesOnClose) {
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free((UChar *)coll->rules);
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->image != NULL && coll->freeImageOnClose) {
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free((UCATableHeader *)coll->image);
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
67627f654740f2a26ad62a5c155af9199af9e69b889claireho        if(coll->leadBytePermutationTable != NULL) {
67727f654740f2a26ad62a5c155af9199af9e69b889claireho            uprv_free(coll->leadBytePermutationTable);
67827f654740f2a26ad62a5c155af9199af9e69b889claireho        }
67927f654740f2a26ad62a5c155af9199af9e69b889claireho        if(coll->reorderCodes != NULL) {
68027f654740f2a26ad62a5c155af9199af9e69b889claireho            uprv_free(coll->reorderCodes);
68127f654740f2a26ad62a5c155af9199af9e69b889claireho        }
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Here, it would be advisable to close: */
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* - UData for UCA (unless we stuff it in the root resb */
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Again, do we need additional housekeeping... HMMM! */
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose);
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->freeOnClose){
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* for safeClone, if freeOnClose is FALSE,
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            don't free the other instance data */
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll);
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_EXIT();
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This one is currently used by genrb & tests. After constructing from rules (tailoring),*/
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* you should be able to get the binary chunk to write out...  Doesn't look very full now */
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint8_t* U_EXPORT2
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status)
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t *result = NULL;
702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status)) {
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll->hasRealData == TRUE) {
706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *length = coll->image->size;
707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result = (uint8_t *)uprv_malloc(*length);
708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* test for NULL */
709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (result == NULL) {
710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(result, coll->image, *length);
714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result = (uint8_t *)uprv_malloc(*length);
717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* test for NULL */
718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (result == NULL) {
719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* build the UCATableHeader with minimal entries */
724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* do not copy the header from the UCA file because its values are wrong! */
725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* reset everything */
728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memset(result, 0, *length);
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* set the tailoring-specific values */
731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCATableHeader *myData = (UCATableHeader *)result;
732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->size = *length;
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* offset for the options, the only part of the data that is present after the header */
735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->options = sizeof(UCATableHeader);
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* need to always set the expansion value for an upper bound of the options */
738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->expansion = myData->options + sizeof(UColOptionSet);
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->magic = UCOL_HEADER_MAGIC;
741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->isBigEndian = U_IS_BIG_ENDIAN;
742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->charSetFamily = U_CHARSET_FAMILY;
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* copy UCA's version; genrb will override all but the builder version with tailoring data */
745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->jamoSpecial = coll->image->jamoSpecial;
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* copy the collator options */
753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return result;
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) {
759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status)) {
760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->caseFirst = (UColAttributeValue)opts->caseFirst;
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->caseLevel = (UColAttributeValue)opts->caseLevel;
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->frenchCollation = (UColAttributeValue)opts->frenchCollation;
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->normalizationMode = (UColAttributeValue)opts->normalizationMode;
76627f654740f2a26ad62a5c155af9199af9e69b889claireho    if(result->normalizationMode == UCOL_ON && !initializeFCD(status)) {
76727f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
76827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->strength = (UColAttributeValue)opts->strength;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->variableTopValue = opts->variableTopValue;
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->alternateHandling = (UColAttributeValue)opts->alternateHandling;
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->numericCollation = (UColAttributeValue)opts->numericCollation;
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->caseFirstisDefault = TRUE;
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->caseLevelisDefault = TRUE;
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->frenchCollationisDefault = TRUE;
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->normalizationModeisDefault = TRUE;
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->strengthisDefault = TRUE;
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->variableTopValueisDefault = TRUE;
78027f654740f2a26ad62a5c155af9199af9e69b889claireho    result->alternateHandlingisDefault = TRUE;
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->hiraganaQisDefault = TRUE;
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->numericCollationisDefault = TRUE;
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucol_updateInternalState(result, status);
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->options = opts;
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Approximate determination if a character is at a contraction end.
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Guaranteed to be TRUE if a character is at the end of a contraction,
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* otherwise it is not deterministic.
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param c character to be determined
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll collator
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c < coll->minContrEndCP) {
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t  hash = c;
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t  htbyte;
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (U16_IS_TRAIL(c)) {
807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return TRUE;
808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    htbyte = coll->contrEndCP[hash>>3];
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (((htbyte >> (hash & 7)) & 1) == 1);
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   i_getCombiningClass()
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*        A fast, at least partly inline version of u_getCombiningClass()
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*        This is a candidate for further optimization.  Used heavily
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*        in contraction processing.
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) {
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t sCC = 0;
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) {
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sCC = u_getCombiningClass(c);
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return sCC;
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status) {
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar c;
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollator *result = fillIn;
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status) || image == NULL) {
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(result == NULL) {
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = (UCollator *)uprv_malloc(sizeof(UCollator));
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(result == NULL) {
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return result;
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result->freeOnClose = TRUE;
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result->freeOnClose = FALSE;
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->image = image;
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->mapping.getFoldingOffset = _getFoldingOffset;
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosition;
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status)) {
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(result->freeOnClose == TRUE) {
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(result);
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = NULL;
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return result;
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping);
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->contractionCEs);
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex);
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion);
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->rules = NULL;
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->rulesLength = 0;
868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->freeRulesOnClose = FALSE;
86927f654740f2a26ad62a5c155af9199af9e69b889claireho    result->reorderCodes = NULL;
87027f654740f2a26ad62a5c155af9199af9e69b889claireho    result->reorderCodesLength = 0;
87127f654740f2a26ad62a5c155af9199af9e69b889claireho    result->leadBytePermutationTable = NULL;
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* get the version info from UCATableHeader and populate the Collator struct*/
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->dataVersion[2] = 0;
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->dataVersion[3] = 0;
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->minUnsafeCP = 0;
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (c=0; c<0x300; c++) {  // Find the smallest unsafe char.
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ucol_unsafeCP(c, result)) break;
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->minUnsafeCP = c;
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP;
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->minContrEndCP = 0;
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (c=0; c<0x300; c++) {  // Find the Contraction-ending char.
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ucol_contractionEndCP(c, result)) break;
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->minContrEndCP = c;
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* max expansion tables */
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->endExpansionCE = (uint32_t*)((uint8_t*)result->image +
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         result->image->endExpansionCE);
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->lastEndExpansionCE = result->endExpansionCE +
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 result->image->endExpansionCECount - 1;
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->expansionCESize = (uint8_t*)result->image +
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               result->image->expansionCESize;
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //result->errorCode = *status;
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->latinOneCEs = NULL;
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->latinOneRegenTable = FALSE;
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->latinOneFailed = FALSE;
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->UCA = UCA;
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Normally these will be set correctly later. This is the default if you use UCA or the default. */
911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->ucaRules = NULL;
912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->actualLocale = NULL;
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->validLocale = NULL;
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->requestedLocale = NULL;
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->hasRealData = FALSE; // real data lives in .dat file...
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->freeImageOnClose = FALSE;
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
91827f654740f2a26ad62a5c155af9199af9e69b889claireho    /* set attributes */
91927f654740f2a26ad62a5c155af9199af9e69b889claireho    ucol_setOptionsFromHeader(
92027f654740f2a26ad62a5c155af9199af9e69b889claireho        result,
92127f654740f2a26ad62a5c155af9199af9e69b889claireho        (UColOptionSet*)((uint8_t*)result->image+result->image->options),
92227f654740f2a26ad62a5c155af9199af9e69b889claireho        status);
92327f654740f2a26ad62a5c155af9199af9e69b889claireho    result->freeOptionsOnClose = FALSE;
92427f654740f2a26ad62a5c155af9199af9e69b889claireho
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* new Mark's code */
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For generation of Implicit CEs
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @author Davis
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Cleaned up so that changes can be made more easily.
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Old values:
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First Implicit: E26A792D
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last Implicit: E3DC70C0
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First CJK: E0030300
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last CJK: E0A9DD00
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First CJK_A: E0A9DF00
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last CJK_A: E0DE3100
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following is a port of Mark's code for new treatment of implicits.
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is positioned here, since ucol_initUCA need to initialize the
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * variables below according to the data in the fractional UCA.
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Function used to:
950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * b) bump any non-CJK characters by 10FFFF.
952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The relevant blocks are:
953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * A:    4E00..9FFF; CJK Unified Ideographs
954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *       F900..FAFF; CJK Compatibility Ideographs
955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * B:    3400..4DBF; CJK Unified Ideographs Extension A
956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *       20000..XX;  CJK Unified Ideographs Extension B (and others later on)
957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * As long as
958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *   no new B characters are allocated between 4E00 and FAFF, and
959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *   no new A characters are outside of this range,
960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (very high probability) this simple code will work.
961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The reordered blocks are:
962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block1 is CJK
963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block2 is CJK_COMPAT_USED
964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block3 is CJK_A
965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (all contiguous)
966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Any other CJK gets its normal code point
967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Any non-CJK gets +10FFFF
968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * When we reorder Block1, we make sure that it is at the very start,
969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * so that it will use a 3-byte form.
970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Warning: the we only pick up the compatibility characters that are
971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * NOT decomposed, so that block is smaller!
972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// CONSTANTS
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    NON_CJK_OFFSET = 0x110000,
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
980b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Precomputed by initImplicitConstants()
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final3Multiplier = 0,
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final4Multiplier = 0,
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final3Count = 0,
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final4Count = 0,
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    medialCount = 0,
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min3Primary = 0,
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min4Primary = 0,
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max4Primary = 0,
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    minTrail = 0,
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    maxTrail = 0,
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max3Trail = 0,
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max4Trail = 0,
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min4Boundary = 0;
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32
99827f654740f2a26ad62a5c155af9199af9e69b889claireho    // 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
99927f654740f2a26ad62a5c155af9199af9e69b889claireho    // 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_BASE = 0x4E00,
100127f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_LIMIT = 0x9FCB+1,
100227f654740f2a26ad62a5c155af9199af9e69b889claireho    // Unified CJK ideographs in the compatibility ideographs block.
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_COMPAT_USED_BASE = 0xFA0E,
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
100527f654740f2a26ad62a5c155af9199af9e69b889claireho    // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
100627f654740f2a26ad62a5c155af9199af9e69b889claireho    // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_A_BASE = 0x3400,
100827f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_A_LIMIT = 0x4DB5+1,
100927f654740f2a26ad62a5c155af9199af9e69b889claireho    // 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
101027f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_B_BASE = 0x20000,
101227f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_B_LIMIT = 0x2A6D6+1,
101327f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;
101427f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;
101527f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_C_BASE = 0x2A700,
101627f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_C_LIMIT = 0x2B734+1,
101727f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;
101827f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;
101927f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_D_BASE = 0x2B740,
102027f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_D_LIMIT = 0x2B81D+1;
102127f654740f2a26ad62a5c155af9199af9e69b889claireho    // when adding to this list, look for all occurrences (in project)
102227f654740f2a26ad62a5c155af9199af9e69b889claireho    // of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing!!!!
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 swapCJK(UChar32 i) {
102527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (i < CJK_A_BASE) {
102627f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
102727f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_A_LIMIT) {
102827f654740f2a26ad62a5c155af9199af9e69b889claireho        // Extension A has lower code points than the original Unihan+compat
102927f654740f2a26ad62a5c155af9199af9e69b889claireho        // but sorts higher.
103027f654740f2a26ad62a5c155af9199af9e69b889claireho        return i - CJK_A_BASE
103127f654740f2a26ad62a5c155af9199af9e69b889claireho                + (CJK_LIMIT - CJK_BASE)
103227f654740f2a26ad62a5c155af9199af9e69b889claireho                + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
103327f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_BASE) {
103427f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
103527f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_LIMIT) {
103627f654740f2a26ad62a5c155af9199af9e69b889claireho        return i - CJK_BASE;
103727f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_COMPAT_USED_BASE) {
103827f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
103927f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_COMPAT_USED_LIMIT) {
104027f654740f2a26ad62a5c155af9199af9e69b889claireho        return i - CJK_COMPAT_USED_BASE
104127f654740f2a26ad62a5c155af9199af9e69b889claireho                + (CJK_LIMIT - CJK_BASE);
104227f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_B_BASE) {
104327f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
104427f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_B_LIMIT) {
104527f654740f2a26ad62a5c155af9199af9e69b889claireho        return i; // non-BMP-CJK
104627f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_C_BASE) {
104727f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
104827f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_C_LIMIT) {
104927f654740f2a26ad62a5c155af9199af9e69b889claireho        return i; // non-BMP-CJK
105027f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_D_BASE) {
105127f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
105227f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_D_LIMIT) {
105327f654740f2a26ad62a5c155af9199af9e69b889claireho        return i; // non-BMP-CJK
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return i + NON_CJK_OFFSET; // non-CJK
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getRawFromCodePoint(UChar32 i) {
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return swapCJK(i)+1;
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getCodePointFromRaw(UChar32 i) {
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i--;
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 result = 0;
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(i >= NON_CJK_OFFSET) {
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = i - NON_CJK_OFFSET;
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(i >= CJK_B_BASE) {
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = i;
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(i < CJK_LIMIT - CJK_BASE) {
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = i + CJK_BASE;
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = -1;
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// GET IMPLICIT PRIMARY WEIGHTS
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Return value is left justified primary key
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getImplicitFromRaw(UChar32 cp) {
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0 || cp > UCOL_MAX_INPUT) {
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t last0 = cp - min4Boundary;
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (last0 < 0) {
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last1 = cp / final3Count;
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last0 = cp % final3Count;
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last2 = last1 / medialCount;
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last1 %= medialCount;
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last1 = minTrail + last1; // offset
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last2 = min3Primary + last2; // offset
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (last2 >= min4Primary) {
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2));
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (last2 << 24) + (last1 << 16) + (last0 << 8);
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last1 = last0 / final4Count;
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last0 %= final4Count;
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last2 = last1 / medialCount;
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last1 %= medialCount;
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last3 = last2 / medialCount;
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last2 %= medialCount;
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last1 = minTrail + last1; // offset
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last2 = minTrail + last2; // offset
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last3 = min4Primary + last3; // offset
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (last3 > max4Primary) {
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3));
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t U_EXPORT2
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getImplicitPrimary(UChar32 cp) {
113627f654740f2a26ad62a5c155af9199af9e69b889claireho   //fprintf(stdout, "Incoming: %04x\n", cp);
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    cp = swapCJK(cp);
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    cp++;
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // we now have a range of numbers from 0 to 21FFFF.
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
114427f654740f2a26ad62a5c155af9199af9e69b889claireho    //fprintf(stdout, "CJK swapped: %04x\n", cp);
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return uprv_uca_getImplicitFromRaw(cp);
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Converts implicit CE into raw integer ("code point")
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param implicit
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return -1 if illegal format
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getRawFromImplicit(uint32_t implicit) {
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 result;
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 b3 = implicit & 0xFF;
1158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 b2 = (implicit >> 8) & 0xFF;
1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 b1 = (implicit >> 16) & 0xFF;
1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 b0 = (implicit >> 24) & 0xFF;
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // simple parameter checks
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (b0 < min3Primary || b0 > max4Primary
1164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || b1 < minTrail || b1 > maxTrail)
1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return -1;
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // normal offsets
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    b1 -= minTrail;
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // take care of the final values, and compose
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (b0 < min4Primary) {
1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (b2 < minTrail || b2 > max3Trail || b3 != 0)
1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return -1;
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b2 -= minTrail;
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 remainder = b2 % final3Multiplier;
1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (remainder != 0)
1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return -1;
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b0 -= min3Primary;
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b2 /= final3Multiplier;
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = ((b0 * medialCount) + b1) * final3Count + b2;
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (b2 < minTrail || b2 > maxTrail
1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            || b3 < minTrail || b3 > max4Trail)
1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return -1;
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b2 -= minTrail;
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b3 -= minTrail;
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 remainder = b3 % final4Multiplier;
1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (remainder != 0)
1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return -1;
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b3 /= final4Multiplier;
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b0 -= min4Primary;
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // final check
1194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (result < 0 || result > UCOL_MAX_INPUT)
1195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return -1;
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t divideAndRoundUp(int a, int b) {
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 1 + (a-1)/b;
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this function is either called from initUCA or from genUCA before
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * doing canonical closure for the UCA.
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set up to generate implicits.
1210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Maintenance Note:  this function may end up being called more than once, due
1211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                    to threading races during initialization.  Make sure that
1212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                    none of the Constants is ever transiently assigned an
1213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                    incorrect value.
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param minPrimary
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param maxPrimary
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param minTrail final byte
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param maxTrail final byte
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param gap3 the gap we leave for tailoring for 3-byte forms
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param gap4 the gap we leave for tailoring for 4-byte forms
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void initImplicitConstants(int minPrimary, int maxPrimary,
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    int minTrailIn, int maxTrailIn,
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    int gap3, int primaries3count,
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    UErrorCode *status) {
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // some simple parameter checks
1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ((minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF)
1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF)
1228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || (primaries3count < 1))
1229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    minTrail = minTrailIn;
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    maxTrail = maxTrailIn;
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min3Primary = minPrimary;
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max4Primary = maxPrimary;
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // compute constants for use later.
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // number of values we can use in trailing bytes
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // leave room for empty values between AND above, e.g. if gap = 2
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // range 3..7 => +3 -4 -5 -6 -7: so 1 value
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final3Multiplier = gap3 + 1;
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // medials can use full range
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    medialCount = (maxTrail - minTrail + 1);
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // find out how many values fit in each form
1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t threeByteCount = medialCount * final3Count;
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // now determine where the 3/4 boundary is.
1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // we use 3 bytes below the boundary, and 4 above
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t primariesAvailable = maxPrimary - minPrimary + 1;
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t primaries4count = primariesAvailable - primaries3count;
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t min3ByteCoverage = primaries3count * threeByteCount;
1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min4Primary = minPrimary + primaries3count;
1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min4Boundary = min3ByteCoverage;
1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Now expand out the multiplier for the 4 bytes, and redo.
1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary;
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (gap4 < 1) {
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final4Multiplier = gap4 + 1;
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final4Count = neededPerFinalByte;
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Supply parameters for generating implicit CEs
1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruuprv_uca_initImplicitConstants(UErrorCode *status) {
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
1284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*    collIterNormalize     Incremental Normalization happens here.                       */
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                          pick up the range of chars identifed by FCD,                  */
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                          normalize it into the collIterate's writable buffer,          */
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                          switch the collIterate's state to use the writable buffer.    */
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                                        */
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid collIterNormalize(collIterate *collationSource)
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status = U_ZERO_ERROR;
129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *srcP = collationSource->pos - 1;      /*  Start of chars to normalize    */
129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *endP = collationSource->fcdPosition;  /* End of region to normalize+1    */
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - srcP)),
130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    collationSource->writableBuffer,
130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    status);
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG
130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_errorName(status));
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    collationSource->pos        = collationSource->writableBuffer.getTerminatedBuffer();
1311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->origFlags  = collationSource->flags;
1312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->flags     |= UCOL_ITER_INNORMBUF;
1313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This function takes the iterator and extracts normalized stuff up to the next boundary
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is similar in the end results to the collIterNormalize, but for the cases when we
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// use an iterator
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizeIterator(collIterate *collationSource) {
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool wasNormalized = FALSE;
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->iterator, UITER_CURRENT);
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t iterIndex = collationSource->iterator->getState(collationSource->iterator);
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(status == U_BUFFER_OVERFLOW_ERROR || normLen == (int32_t)collationSource->writableBufSize) {
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // reallocate and terminate
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!u_growBufferFromStatic(collationSource->stackWritableBuffer,
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               &collationSource->writableBuffer,
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               (int32_t *)&collationSource->writableBufSize, normLen + 1,
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               0)
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #ifdef UCOL_DEBUG
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "normalizeIterator(), out of memory\n");
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #endif
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //collationSource->iterator->move(collationSource->iterator, iterIndex, UITER_ZERO);
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collationSource->iterator->setState(collationSource->iterator, iterIndex, &status);
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Terminate the buffer - we already checked that it is big enough
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->writableBuffer[normLen] = 0;
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      collationSource->flags |= UCOL_ITER_ALLOCATED;
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->pos        = collationSource->writableBuffer;
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->origFlags  = collationSource->flags;
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->flags     |= UCOL_ITER_INNORMBUF;
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Incremental FCD check and normalize                                                    */
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*   Called from getNextCE when normalization state is suspect.                           */
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*   When entering, the state is known to be this:                                        */
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*      o   We are working in the main buffer of the collIterate, not the side            */
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          writable buffer.  When in the side buffer, normalization mode is always off,  */
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          so we won't get here.                                                         */
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*      o   The leading combining class from the current character is 0 or                */
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          the trailing combining class of the previous char was zero.                   */
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          True because the previous call to this function will have always exited       */
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          that way, and we get called for every char where cc might be non-zero.        */
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIterFCD(collIterate *collationSource) {
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *srcP, *endP;
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t     leadingCC;
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t     prevTrailingCC = 0;
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t    fcd;
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool       needNormalize = FALSE;
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    srcP = collationSource->pos-1;
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (collationSource->flags & UCOL_ITER_HASLEN) {
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        endP = collationSource->endp;
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        endP = NULL;
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Get the trailing combining class of the current character.  If it's zero,
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   we are OK.
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* trie access */
1387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fcd != 0) {
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (prevTrailingCC != 0) {
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // The current char has a non-zero trailing CC.  Scan forward until we find
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   a char with a leading cc of zero.
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while (endP == NULL || srcP != endP)
1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UChar *savedSrcP = srcP;
1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* trie access */
1399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);
1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (leadingCC == 0) {
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    srcP = savedSrcP;      // Hit char that is not part of combining sequence.
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           //   back up over it.  (Could be surrogate pair!)
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (leadingCC < prevTrailingCC) {
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    needNormalize = TRUE;
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collationSource->fcdPosition = (UChar *)srcP;
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return needNormalize;
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the CE retrieval functions                                 */
1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                          */
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t getImplicit(UChar32 cp, collIterate *collationSource);
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource);
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there should be a macro version of this function in the header file */
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the first function that tries to fetch a collation element  */
1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* If it's not succesfull or it encounters a more difficult situation  */
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* some more sofisticated and slower functions are invoked             */
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t order = 0;
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (collationSource->CEpos > collationSource->toReturn) {       /* Are there any CEs from previous expansions? */
1437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        order = *(collationSource->toReturn++);                         /* if so, return them */
1438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(collationSource->CEpos == collationSource->toReturn) {
1439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            collationSource->CEpos = collationSource->toReturn = collationSource->extendCEs ? collationSource->extendCEs : collationSource->CEs;
1440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return order;
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar ch = 0;
1445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->offsetReturn = NULL;
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;)                           /* Loop handles case when incremental normalize switches   */
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {                                  /*   to or from the side buffer / original string, and we  */
1449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*   need to start again to get the next character.        */
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // The source string is null terminated and we're not working from the side buffer,
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   and we're not normalizing.  This is the fast path.
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *collationSource->pos++;
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (ch != 0) {
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else {
1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return UCOL_NO_MORE_CES;
1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (collationSource->flags & UCOL_ITER_HASLEN) {
1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Normal path for strings when length is specified.
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   (We can't be in side buffer because it is always null terminated.)
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (collationSource->pos >= collationSource->endp) {
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Ran off of the end of the main source string.  We're done.
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return UCOL_NO_MORE_CES;
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *collationSource->pos++;
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else if(collationSource->flags & UCOL_USE_ITERATOR) {
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(iterCh == U_SENTINEL) {
1477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return UCOL_NO_MORE_CES;
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = (UChar)iterCh;
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Null terminated string.
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *collationSource->pos++;
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (ch == 0) {
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Ran off end of buffer.
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Ran off end of main string. backing up one character.
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    collationSource->pos--;
1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return UCOL_NO_MORE_CES;
1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                else
1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                {
1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Hit null in the normalize side buffer.
1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Usually this means the end of the normalized data,
1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // except for one odd case: a null followed by combining chars,
1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   which is the case if we are at the start of the buffer.
149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {
1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  Null marked end of side buffer.
1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   Revert to the main string and
1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   loop back to top to try again to get a character.
1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    collationSource->pos   = collationSource->fcdPosition;
1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    collationSource->flags = collationSource->origFlags;
1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(collationSource->flags&UCOL_HIRAGANA_Q) {
1513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
1514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru             * based on whether the previous codepoint was Hiragana or Katakana.
1515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru             */
1516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||
1517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
1518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collationSource->flags |= UCOL_WAS_HIRAGANA;
1519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
1520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collationSource->flags &= ~UCOL_WAS_HIRAGANA;
1521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // We've got a character.  See if there's any fcd and/or normalization stuff to do.
1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.
1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (collationSource->fcdPosition >= collationSource->pos) {
1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // An earlier FCD check has already covered the current character.
1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We can go ahead and process this char.
1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ch < ZERO_CC_LIMIT_ ) {
1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Fast fcd safe path.  Trailing combining class == 0.  This char is OK.
1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We need to peek at the next character in order to tell if we are FCD
1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // We are at the last char of source string.
1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  It is always OK for FCD check.
1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Not at last char of source string (or we'll check against terminating null).  Do the FCD fast test
1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Need a more complete FCD check and possible normalization.
1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (collIterFCD(collationSource)) {
1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            collIterNormalize(collationSource);
1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //  No normalization was needed.  Go ahead and process the char we already had.
1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Some normalization happened.  Next loop iteration will pick up a char
1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   from the normalization buffer.
1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }   // end for (;;)
1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (ch <= 0xFF) {
1572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*  For latin-1 characters we never need to fall back to the UCA table        */
1573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*    because all of the UCA data is replicated in the latinOneMapping array  */
1574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        order = coll->latinOneMapping[ch];
1575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (order > UCOL_NOT_FOUND) {
1576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
1577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    else
1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // Always use UCA for Han, Hangul
1582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // (Han extension A is before main Han block)
1583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // **** Han compatibility chars ?? ****
1584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
1585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
1586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
1587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                // between the two target ranges; do normal lookup
1588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                // **** this range is YI, Modifier tone letters, ****
1589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
1590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                // **** Latin-D might be tailored, so we need to ****
1591b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                // **** do the normal lookup for these guys.     ****
1592b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
1593b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else {
1594b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                // in one of the target ranges; use UCA
1595b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                order = UCOL_NOT_FOUND;
1596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
1597b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else {
1598b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
1599b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        }
1600b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
1602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);    /* and try to get the special CE     */
1603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1604b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a good CE in the tailoring */
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
1610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(order == UCOL_NOT_FOUND) {
1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        order = getImplicit(ch, collationSource);
1616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return order; /* return the CE */
1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_getNextCE, out-of-line version for use from other files.   */
1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t  U_EXPORT2
1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ucol_IGetNextCE(coll, collationSource, status);
1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Incremental previous normalization happens here. Pick up the range of chars
1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* identifed by FCD, normalize it into the collIterate's writable buffer,
1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* switch the collIterate's state to use the writable buffer.
1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data
1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid collPrevIterNormalize(collIterate *data)
1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status  = U_ZERO_ERROR;
163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pEnd   = data->pos;  /* End normalize + 1 */
163850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pStart;
1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Start normalize */
1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->fcdPosition == NULL) {
1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pStart = data->string;
1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pStart = data->fcdPosition + 1;
1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t normLen =
164950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pStart) + 1)),
165050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             data->writableBuffer,
165150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             status).
165250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length();
165350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(status)) {
165450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    this puts the null termination infront of the normalized string instead
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    of the end
1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
166050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->writableBuffer.insert(0, (UChar)0);
1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /*
1663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * The usual case at this point is that we've got a base
1664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * character followed by marks that were normalized. If
1665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * fcdPosition is NULL, that means that we backed up to
1666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * the beginning of the string and there's no base character.
1667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *
1668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * Forward processing will usually normalize when it sees
1669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * the first mark, so that mark will get it's natural offset
1670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * and the rest will get the offset of the character following
1671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * the marks. The base character will also get its natural offset.
1672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *
1673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * We write the offset of the base character, if there is one,
1674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * followed by the offset of the first mark and then the offsets
1675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * of the rest of the marks.
1676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     */
1677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t firstMarkOffset = 0;
167850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t trailOffset     = (int32_t)(data->pos - data->string + 1);
1679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t trailCount      = normLen - 1;
1680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (data->fcdPosition != NULL) {
168250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t baseOffset = (int32_t)(data->fcdPosition - data->string);
1683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UChar   baseChar   = *data->fcdPosition;
1684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        firstMarkOffset = baseOffset + 1;
1686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*
168850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * If the base character is the start of a contraction, forward processing
168950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * will normalize the marks while checking for the contraction, which means
169050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * that the offset of the first mark will the same as the other marks.
169150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
169250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * **** THIS IS PROBABLY NOT A COMPLETE TEST ****
169350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
169450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (baseChar >= 0x100) {
169550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, baseChar);
169650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
169750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) {
169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, baseChar);
169950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION_TAG) {
170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                firstMarkOffset = trailOffset;
170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
170627f654740f2a26ad62a5c155af9199af9e69b889claireho        data->appendOffset(baseOffset, status);
1707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
170927f654740f2a26ad62a5c155af9199af9e69b889claireho    data->appendOffset(firstMarkOffset, status);
1710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for (int32_t i = 0; i < trailCount; i += 1) {
171227f654740f2a26ad62a5c155af9199af9e69b889claireho        data->appendOffset(trailOffset, status);
1713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    data->offsetRepeatValue = trailOffset;
1716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    data->offsetReturn = data->offsetStore - 1;
1718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (data->offsetReturn == data->offsetBuffer) {
1719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->offsetStore = data->offsetBuffer;
1720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
172250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->pos        = data->writableBuffer.getTerminatedBuffer() + 1 + normLen;
1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->origFlags  = data->flags;
1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     |= UCOL_ITER_INNORMBUF;
1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Incremental FCD check for previous iteration and normalize. Called from
1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* getPrevCE when normalization state is suspect.
1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* When entering, the state is known to be this:
1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* o  We are working in the main buffer of the collIterate, not the side
1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    writable buffer. When in the side buffer, normalization mode is always
1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    off, so we won't get here.
1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* o  The leading combining class from the current character is 0 or the
1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    trailing combining class of the previous char was zero.
1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    True because the previous call to this function will have always exited
1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    that way, and we get called for every char where cc might be non-zero.
1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterate struct
1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return normalization status, TRUE for normalization to be done, FALSE
1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*         otherwise
1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collPrevIterFCD(collIterate *data)
1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *src, *start;
1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t     leadingCC;
1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t     trailingCC = 0;
1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t    fcd;
1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool       result = FALSE;
1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    start = data->string;
1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    src = data->pos + 1;
1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Get the trailing combining class of the current character. */
1757b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);
1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (leadingCC != 0) {
1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        The current char has a non-zero leading combining class.
1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        Scan backward until we find a char with a trailing cc of zero.
1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (;;)
1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (start == src) {
1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->fcdPosition = NULL;
1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return result;
1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1773b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);
1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (trailingCC == 0) {
1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (leadingCC < trailingCC) {
1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                result = TRUE;
1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->fcdPosition = (UChar *)src;
1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
179427f654740f2a26ad62a5c155af9199af9e69b889claireho/** gets a code unit from the string at a given offset
1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  Handles both normal and iterative cases.
1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  No error checking - caller beware!
1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
179827f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline
179927f654740f2a26ad62a5c155af9199af9e69b889clairehoUChar peekCodeUnit(collIterate *source, int32_t offset) {
1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(source->pos != NULL) {
1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return *(source->pos + offset);
1802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else if(source->iterator != NULL) {
180327f654740f2a26ad62a5c155af9199af9e69b889claireho        UChar32 c;
1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(offset != 0) {
1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            source->iterator->move(source->iterator, offset, UITER_CURRENT);
180627f654740f2a26ad62a5c155af9199af9e69b889claireho            c = source->iterator->next(source->iterator);
1807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            source->iterator->move(source->iterator, -offset-1, UITER_CURRENT);
1808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
180927f654740f2a26ad62a5c155af9199af9e69b889claireho            c = source->iterator->current(source->iterator);
1810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
181127f654740f2a26ad62a5c155af9199af9e69b889claireho        return c >= 0 ? (UChar)c : 0xfffd;  // If the caller works properly, we should never see c<0.
1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
181327f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0xfffd;
1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
181727f654740f2a26ad62a5c155af9199af9e69b889claireho// Code point version. Treats the offset as a _code point_ delta.
181827f654740f2a26ad62a5c155af9199af9e69b889claireho// We cannot use U16_FWD_1_UNSAFE and similar because we might not have well-formed UTF-16.
181927f654740f2a26ad62a5c155af9199af9e69b889claireho// We cannot use U16_FWD_1 and similar because we do not know the start and limit of the buffer.
182027f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline
182127f654740f2a26ad62a5c155af9199af9e69b889clairehoUChar32 peekCodePoint(collIterate *source, int32_t offset) {
182227f654740f2a26ad62a5c155af9199af9e69b889claireho    UChar32 c;
182327f654740f2a26ad62a5c155af9199af9e69b889claireho    if(source->pos != NULL) {
182427f654740f2a26ad62a5c155af9199af9e69b889claireho        const UChar *p = source->pos;
182527f654740f2a26ad62a5c155af9199af9e69b889claireho        if(offset >= 0) {
182627f654740f2a26ad62a5c155af9199af9e69b889claireho            // Skip forward over (offset-1) code points.
182727f654740f2a26ad62a5c155af9199af9e69b889claireho            while(--offset >= 0) {
182827f654740f2a26ad62a5c155af9199af9e69b889claireho                if(U16_IS_LEAD(*p++) && U16_IS_TRAIL(*p)) {
182927f654740f2a26ad62a5c155af9199af9e69b889claireho                    ++p;
183027f654740f2a26ad62a5c155af9199af9e69b889claireho                }
183127f654740f2a26ad62a5c155af9199af9e69b889claireho            }
183227f654740f2a26ad62a5c155af9199af9e69b889claireho            // Read the code point there.
183327f654740f2a26ad62a5c155af9199af9e69b889claireho            c = *p++;
183427f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar trail;
183527f654740f2a26ad62a5c155af9199af9e69b889claireho            if(U16_IS_LEAD(c) && U16_IS_TRAIL(trail = *p)) {
183627f654740f2a26ad62a5c155af9199af9e69b889claireho                c = U16_GET_SUPPLEMENTARY(c, trail);
183727f654740f2a26ad62a5c155af9199af9e69b889claireho            }
183827f654740f2a26ad62a5c155af9199af9e69b889claireho        } else /* offset<0 */ {
183927f654740f2a26ad62a5c155af9199af9e69b889claireho            // Skip backward over (offset-1) code points.
184027f654740f2a26ad62a5c155af9199af9e69b889claireho            while(++offset < 0) {
184127f654740f2a26ad62a5c155af9199af9e69b889claireho                if(U16_IS_TRAIL(*--p) && U16_IS_LEAD(*(p - 1))) {
184227f654740f2a26ad62a5c155af9199af9e69b889claireho                    --p;
184327f654740f2a26ad62a5c155af9199af9e69b889claireho                }
184427f654740f2a26ad62a5c155af9199af9e69b889claireho            }
184527f654740f2a26ad62a5c155af9199af9e69b889claireho            // Read the code point before that.
184627f654740f2a26ad62a5c155af9199af9e69b889claireho            c = *--p;
184727f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar lead;
184827f654740f2a26ad62a5c155af9199af9e69b889claireho            if(U16_IS_TRAIL(c) && U16_IS_LEAD(lead = *(p - 1))) {
184927f654740f2a26ad62a5c155af9199af9e69b889claireho                c = U16_GET_SUPPLEMENTARY(lead, c);
185027f654740f2a26ad62a5c155af9199af9e69b889claireho            }
185127f654740f2a26ad62a5c155af9199af9e69b889claireho        }
185227f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if(source->iterator != NULL) {
185327f654740f2a26ad62a5c155af9199af9e69b889claireho        if(offset >= 0) {
185427f654740f2a26ad62a5c155af9199af9e69b889claireho            // Skip forward over (offset-1) code points.
185527f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t fwd = offset;
185627f654740f2a26ad62a5c155af9199af9e69b889claireho            while(fwd-- > 0) {
185727f654740f2a26ad62a5c155af9199af9e69b889claireho                uiter_next32(source->iterator);
185827f654740f2a26ad62a5c155af9199af9e69b889claireho            }
185927f654740f2a26ad62a5c155af9199af9e69b889claireho            // Read the code point there.
186027f654740f2a26ad62a5c155af9199af9e69b889claireho            c = uiter_current32(source->iterator);
186127f654740f2a26ad62a5c155af9199af9e69b889claireho            // Return to the starting point, skipping backward over (offset-1) code points.
186227f654740f2a26ad62a5c155af9199af9e69b889claireho            while(offset-- > 0) {
186327f654740f2a26ad62a5c155af9199af9e69b889claireho                uiter_previous32(source->iterator);
186427f654740f2a26ad62a5c155af9199af9e69b889claireho            }
186527f654740f2a26ad62a5c155af9199af9e69b889claireho        } else /* offset<0 */ {
186627f654740f2a26ad62a5c155af9199af9e69b889claireho            // Read backward, reading offset code points, remember only the last-read one.
186727f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t back = offset;
186827f654740f2a26ad62a5c155af9199af9e69b889claireho            do {
186927f654740f2a26ad62a5c155af9199af9e69b889claireho                c = uiter_previous32(source->iterator);
187027f654740f2a26ad62a5c155af9199af9e69b889claireho            } while(++back < 0);
187127f654740f2a26ad62a5c155af9199af9e69b889claireho            // Return to the starting position, skipping forward over offset code points.
187227f654740f2a26ad62a5c155af9199af9e69b889claireho            do {
187327f654740f2a26ad62a5c155af9199af9e69b889claireho                uiter_next32(source->iterator);
187427f654740f2a26ad62a5c155af9199af9e69b889claireho            } while(++offset < 0);
187527f654740f2a26ad62a5c155af9199af9e69b889claireho        }
187627f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
187727f654740f2a26ad62a5c155af9199af9e69b889claireho        c = U_SENTINEL;
187827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
187927f654740f2a26ad62a5c155af9199af9e69b889claireho    return c;
188027f654740f2a26ad62a5c155af9199af9e69b889claireho}
188127f654740f2a26ad62a5c155af9199af9e69b889claireho
1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Determines if we are at the start of the data string in the backwards
1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collation iterator
1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator
1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return TRUE if we are at the start
1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool isAtStartPrevIterate(collIterate *data) {
1890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->pos == NULL && data->iterator != NULL) {
1891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return !data->iterator->hasPrevious(data->iterator);
1892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //return (collIter_bos(data)) ||
1894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (data->pos == data->string) ||
1895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru              ((data->flags & UCOL_ITER_INNORMBUF) &&
1896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru              *(data->pos - 1) == 0 && data->fcdPosition == NULL);
1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void goBackOne(collIterate *data) {
1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# if 0
1902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // somehow, it looks like we need to keep iterator synced up
1903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // at all times, as above.
1904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->pos) {
1905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->pos--;
1906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->iterator) {
1908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->iterator->previous(data->iterator);
1909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
1912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->iterator->previous(data->iterator);
1913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->pos) {
1915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->pos --;
1916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inline function that gets a simple CE.
1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* So what it does is that it will first check the expansion buffer. If the
1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* expansion buffer is not empty, ie the end pointer to the expansion buffer
1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* is different from the string pointer, we return the collation element at the
1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* return pointer and decrement it.
1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* For more complicated CEs it resorts to getComplicatedCE.
1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll collator data
1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator struct
1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param status error status
1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               UErrorCode *status)
1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t result = (uint32_t)UCOL_NULLORDER;
1935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (data->offsetReturn != NULL) {
1937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (data->offsetRepeatCount > 0) {
1938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                data->offsetRepeatCount -= 1;
1939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
1940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (data->offsetReturn == data->offsetBuffer) {
1941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                data->offsetReturn = NULL;
194227f654740f2a26ad62a5c155af9199af9e69b889claireho                data->offsetStore  = data->offsetBuffer;
1943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
1944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                data->offsetReturn -= 1;
1945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
1946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ((data->extendCEs && data->toReturn > data->extendCEs) ||
1950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            (!data->extendCEs && data->toReturn > data->CEs))
1951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->toReturn -= 1;
1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = *(data->toReturn);
1954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (data->CEs == data->toReturn || data->extendCEs == data->toReturn) {
1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->CEpos = data->toReturn;
1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar ch = 0;
1960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        Loop handles case when incremental normalize switches to or from the
1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        side buffer / original string, and we need to start again to get the
1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        next character.
1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (;;) {
1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (data->flags & UCOL_ITER_HASLEN) {
1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                Normal path for strings when length is specified.
1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                Not in side buffer because it is always null terminated.
1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                */
1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (data->pos <= data->string) {
1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* End of the main source string */
1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return UCOL_NO_MORE_CES;
1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->pos --;
1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch = *data->pos;
1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // we are using an iterator to go back. Pray for us!
1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (data->flags & UCOL_USE_ITERATOR) {
1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UChar32 iterCh = data->iterator->previous(data->iterator);
1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              if(iterCh == U_SENTINEL) {
1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return UCOL_NO_MORE_CES;
1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              } else {
1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch = (UChar)iterCh;
1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              }
1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else {
1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->pos --;
1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch = *data->pos;
1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* we are in the side buffer. */
1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (ch == 0) {
1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /*
1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    At the start of the normalize side buffer.
1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    Go back to string.
1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    Because pointer points to the last accessed character,
1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    hence we have to increment it by one here.
1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    */
1999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    data->flags = data->origFlags;
2000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    data->offsetRepeatValue = 0;
2001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                     if (data->fcdPosition == NULL) {
2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        data->pos = data->string;
2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return UCOL_NO_MORE_CES;
2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    else {
2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        data->pos   = data->fcdPosition + 1;
2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                   continue;
2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(data->flags&UCOL_HIRAGANA_Q) {
2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              if(ch>=0x3040 && ch<=0x309f) {
2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->flags |= UCOL_WAS_HIRAGANA;
2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              } else {
2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->flags &= ~UCOL_WAS_HIRAGANA;
2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              }
2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            * got a character to determine if there's fcd and/or normalization
2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            * stuff to do.
2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            * if the current character is not fcd.
2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            * if current character is at the start of the string
2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            * Trailing combining class == 0.
2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            * Note if pos is in the writablebuffer, norm is always 0
2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            */
2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (ch < ZERO_CC_LIMIT_ ||
2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              // this should propel us out of the loop in the iterator case
2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                (data->flags & UCOL_ITER_NORM) == 0 ||
2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                || data->string == data->pos) {
2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* if next character is FCD */
2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (data->pos == data->string) {
2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* First char of string is always OK for FCD check */
2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Not first char of string, do the FCD fast test */
2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Need a more complete FCD check and possible normalization. */
2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (collPrevIterFCD(data)) {
2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                collPrevIterNormalize(data);
2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*  No normalization. Go ahead and process the char. */
2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            Some normalization happened.
2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            Next loop picks up a char from the normalization buffer.
2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            */
2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* attempt to handle contractions, after removal of the backwards
2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        contraction
2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
2071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (ch <= 0xFF) {
2074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = coll->latinOneMapping[ch];
2075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            else {
2077b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                // Always use UCA for [3400..9FFF], [AC00..D7AF]
2078b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                // **** [FA0E..FA2F] ?? ****
2079b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
2080b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    (ch >= 0x3400 && ch <= 0xD7AF)) {
2081b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    if (ch > 0x9FFF && ch < 0xAC00) {
2082b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // between the two target ranges; do normal lookup
2083b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // **** this range is YI, Modifier tone letters, ****
2084b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
2085b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // **** Latin-D might be tailored, so we need to ****
2086b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // **** do the normal lookup for these guys.     ****
2087b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                         result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
2088b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    } else {
2089b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        result = UCOL_NOT_FOUND;
2090b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    }
2091b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                } else {
2092b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
2093b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                }
2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (result > UCOL_NOT_FOUND) {
2096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
2097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (result == UCOL_NOT_FOUND) { // Not found in master list
2099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (!isAtStartPrevIterate(data) &&
2100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    ucol_contractionEndCP(ch, data->coll))
2101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                {
2102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    result = UCOL_CONTRACTION;
2103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
2104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(coll->UCA) {
2105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
2106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
2108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (result > UCOL_NOT_FOUND) {
2110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(coll->UCA) {
2111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
2112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(result == UCOL_NOT_FOUND) {
2118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result = getPrevImplicit(ch, data);
2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*   ucol_getPrevCE, out-of-line version for use from other files.  */
2127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint32_t  U_EXPORT2
2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getPrevCE(const UCollator *coll, collIterate *data,
2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UErrorCode *status) {
2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ucol_IGetPrevCE(coll, data, status);
2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this should be connected to special Jamo handling */
2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint32_t  U_EXPORT2
2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) {
2137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collIterate colIt;
213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, &u, 1, &colIt, status);
213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return ucol_IGetNextCE(coll, &colIt, status);
2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument character into the end of the buffer pushing back the
2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* null terminator.
2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate struct data
2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch character to be appended
2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the position of the new addition
2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
215350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline const UChar * insertBufferEnd(collIterate *data, UChar ch)
2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
215550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t oldLength = data->writableBuffer.length();
215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength;
2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument string into the end of the buffer pushing back the
2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* null terminator.
2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate struct data
2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param string to be appended
2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param length of the string to be appended
2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the position of the new addition
2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
216850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline const UChar * insertBufferEnd(collIterate *data, const UChar *str, int32_t length)
2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
217050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t oldLength = data->writableBuffer.length();
217150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldLength;
2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Special normalization function for contraction in the forwards iterator.
2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This normalization sequence will place the current character at source->pos
2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and its following normalized sequence into the buffer.
2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The fcd position, pos will be changed.
2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* pos will now point to positions in the buffer.
2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Flags will be changed accordingly.
2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data
2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizeNextContraction(collIterate *data)
2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
218650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     strsize;
2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status     = U_ZERO_ERROR;
2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* because the pointer points to the next character */
218950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pStart    = data->pos - 1;
219050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pEnd;
2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
219350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->writableBuffer.setTo(*(pStart - 1));
2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        strsize               = 1;
2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
219750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        strsize = data->writableBuffer.length();
2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pEnd = data->fcdPosition;
2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
220250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->writableBuffer.append(
220350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), status));
220450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(status)) {
220550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
220850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->pos        = data->writableBuffer.getTerminatedBuffer() + strsize;
2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->origFlags  = data->flags;
2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     |= UCOL_ITER_INNORMBUF;
2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Contraction character management function that returns the next character
2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* for the forwards iterator.
2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Does nothing if the next character is in buffer and not the first character
2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* in it.
2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Else it checks next character in data string to see if it is normalizable.
2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* If it is not, the character is simply copied into the buffer, else
2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the whole normalized substring is copied into the buffer, including the
2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* current character.
2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data
2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return next character
2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UChar getNextNormalizedChar(collIterate *data)
2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar  nextch;
2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar  ch;
2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Here we need to add the iterator code. One problem is the way
2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // end of string is handled. If we just return next char, it could
2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // be the sentinel. Most of the cases already check for this, but we
2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // need to be sure.
2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ) {
2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         /* if no normalization and not in buffer. */
2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if(data->flags & UCOL_USE_ITERATOR) {
2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         return (UChar)data->iterator->next(data->iterator);
2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      } else {
2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         return *(data->pos ++);
2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      }
2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (data->flags & UCOL_ITER_NORM && data->flags & UCOL_USE_ITERATOR) {
2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //normalizeIterator(data);
2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((innormbuf && *data->pos != 0) ||
2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (data->fcdPosition != NULL && !innormbuf &&
2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->pos < data->fcdPosition)) {
2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if next character is in normalized buffer, no further normalization
2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        is required
2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *(data->pos ++);
2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->flags & UCOL_ITER_HASLEN) {
2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* in data string */
2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (data->pos + 1 == data->endp) {
2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos ++);
2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (innormbuf) {
2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // inside the normalization buffer, but at the end
2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // (since we encountered zero). This means, in the
2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // case we're using char iterator, that we need to
2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // do another round of normalization.
2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //if(data->origFlags & UCOL_USE_ITERATOR) {
2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // we need to restore original flags,
2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // otherwise, we'll lose them
2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //data->flags = data->origFlags;
2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //normalizeIterator(data);
2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //return *(data->pos++);
2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //} else {
2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            in writable buffer, at this point fcdPosition can not be
2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pointing to the end of the data string. see contracting tag.
2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            */
2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          if(data->fcdPosition) {
2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*(data->fcdPosition + 1) == 0 ||
2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->fcdPosition + 1 == data->endp) {
2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* at the end of the string, dump it into the normalizer */
228650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1;
2287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Check if data->pos received a null pointer
2288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (data->pos == NULL) {
2289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return (UChar)-1; // Return to indicate error.
2290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return *(data->fcdPosition ++);
2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->pos = data->fcdPosition;
2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          } else if(data->origFlags & UCOL_USE_ITERATOR) {
2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // if we are here, we're using a normalizing iterator.
2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // we should just continue further.
2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->flags = data->origFlags;
2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->pos = NULL;
2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return (UChar)data->iterator->next(data->iterator);
2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          }
2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //}
2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*(data->pos + 1) == 0) {
2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return *(data->pos ++);
2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ch = *data->pos ++;
2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    nextch = *data->pos;
2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * if the current character is not fcd.
2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Trailing combining class == 0.
2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->fcdPosition == NULL || data->fcdPosition < data->pos) &&
2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ ||
2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) {
2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            Need a more complete FCD check and possible normalization.
2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            normalize substring will be appended to buffer
2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            */
2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (collIterFCD(data)) {
2325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            normalizeNextContraction(data);
2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos ++);
2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else if (innormbuf) {
2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* fcdposition shifted even when there's no normalization, if we
2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            don't input the rest into this, we'll get the wrong position when
2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            we reach the end of the writableBuffer */
233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t length = (int32_t)(data->fcdPosition - data->pos + 1);
233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            data->pos = insertBufferEnd(data, data->pos - 1, length);
2334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Check if data->pos received a null pointer
2335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (data->pos == NULL) {
2336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return (UChar)-1; // Return to indicate error.
2337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos ++);
2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (innormbuf) {
2343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        no normalization is to be done hence only one character will be
2345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        appended to the buffer.
2346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
234750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->pos = insertBufferEnd(data, ch) + 1;
2348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Check if data->pos received a null pointer
2349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (data->pos == NULL) {
2350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return (UChar)-1; // Return to indicate error.
2351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
2352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* points back to the pos in string */
2355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ch;
2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Function to copy the buffer into writableBuffer and sets the fcd position to
2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the correct position
2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param source data string source
2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param buffer character buffer
2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
236750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &buffer)
2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* okay confusing part here. to ensure that the skipped characters are
2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    considered later, we need to place it in the appropriate position in the
2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    normalization buffer and reassign the pos pointer. simple case if pos
2372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reside in string, simply copy to normalization buffer and
2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fcdposition = pos, pos = start of normalization buffer. if pos in
2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    normalization buffer, we'll insert the copy infront of pos and point pos
2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    to the start of the normalization buffer. why am i doing these copies?
2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecialCE does
2377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    not require any changes, which be really painful. */
2378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (source->flags & UCOL_ITER_INNORMBUF) {
237950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t replaceLength = source->pos - source->writableBuffer.getBuffer();
238050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        source->writableBuffer.replace(0, replaceLength, buffer);
2381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source->fcdPosition  = source->pos;
2384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source->origFlags    = source->flags;
2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source->flags       |= UCOL_ITER_INNORMBUF;
2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source->flags       &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
238750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        source->writableBuffer = buffer;
2388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
239050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    source->pos = source->writableBuffer.getTerminatedBuffer();
2391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Function to get the discontiguos collation element within the source.
2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Note this function will set the position to the appropriate places.
2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll current collator used
2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param source data string source
2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param constart index to the start character in the contraction table
2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return discontiguos collation element offset
2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t getDiscontiguous(const UCollator *coll, collIterate *source,
2403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                const UChar *constart)
2404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* source->pos currently points to the second combining character after
2406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       the start character */
240750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          const UChar *temppos      = source->pos;
240850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          UnicodeString buffer;
2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar   *tempconstart = constart;
2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          uint8_t  tempflags    = source->flags;
2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          UBool    multicontraction = FALSE;
2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          collIterateState discState;
2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          backupState(source, &discState);
2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
241627f654740f2a26ad62a5c155af9199af9e69b889claireho    buffer.setTo(peekCodePoint(source, -1));
2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar    *UCharOffset;
2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar     schar,
2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  tchar;
2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t  result;
2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp)
242427f654740f2a26ad62a5c155af9199af9e69b889claireho            || (peekCodeUnit(source, 0) == 0  &&
2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //|| (*source->pos == 0  &&
2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ((source->flags & UCOL_ITER_INNORMBUF) == 0 ||
2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 source->fcdPosition == NULL ||
2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 source->fcdPosition == source->endp ||
2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 *(source->fcdPosition) == 0 ||
2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 u_getCombiningClass(*(source->fcdPosition)) == 0)) ||
2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 /* end of string in null terminated string or stopped by a
2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 null character, note fcd does not always point to a base
2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 character after the discontiguos change */
243427f654740f2a26ad62a5c155af9199af9e69b889claireho                 u_getCombiningClass(peekCodePoint(source, 0)) == 0) {
2435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 //u_getCombiningClass(*(source->pos)) == 0) {
2436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //constart = (UChar *)coll->image + getContractOffset(CE);
2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (multicontraction) {
2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                source->pos    = temppos - 1;
243950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                setDiscontiguosAttribute(source, buffer);
2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return *(coll->contractionCEs +
2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    (tempconstart - coll->contractionIndex));
2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            constart = tempconstart;
2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UCharOffset = (UChar *)(tempconstart + 1); /* skip the backward offset*/
2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        schar = getNextNormalizedChar(source);
2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (schar > (tchar = *UCharOffset)) {
2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UCharOffset++;
2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (schar != tchar) {
2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* not the correct codepoint. we stuff the current codepoint into
2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            the discontiguos buffer and try the next character */
245750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer.append(schar);
2458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
2459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
2461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (u_getCombiningClass(schar) ==
246227f654740f2a26ad62a5c155af9199af9e69b889claireho                u_getCombiningClass(peekCodePoint(source, -2))) {
246350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                buffer.append(schar);
2464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = *(coll->contractionCEs +
2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      (UCharOffset - coll->contractionIndex));
2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (result == UCOL_NOT_FOUND) {
2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          break;
2472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (isContraction(result)) {
2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* this is a multi-contraction*/
2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tempconstart = (UChar *)coll->image + getContractOffset(result);
2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*(coll->contractionCEs + (constart - coll->contractionIndex))
2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                != UCOL_NOT_FOUND) {
2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                multicontraction = TRUE;
2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                temppos       = source->pos + 1;
2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
248150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            setDiscontiguosAttribute(source, buffer);
2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return result;
2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* no problems simply reverting just like that,
2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if we are in string before getting into this function, points back to
2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string hence no problem.
2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if we are in normalization buffer before getting into this function,
2490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    since we'll never use another normalization within this function, we
2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    know that fcdposition points to a base character. the normalization buffer
2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    never change, hence this revert works. */
2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    loadState(source, &discState, TRUE);
2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    goBackOne(source);
2495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //source->pos   = temppos - 1;
2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source->flags = tempflags;
2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *(coll->contractionCEs + (constart - coll->contractionIndex));
2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* now uses Mark's getImplicitPrimary code */
2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) {
2504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t r = uprv_uca_getImplicitPrimary(cp);
2505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0;
2506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->offsetRepeatCount += 1;
2507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order'
2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument character into the front of the buffer replacing the
2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* front null terminator.
2513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data
2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch character to be appended
2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
251750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline void insertBufferFront(collIterate *data, UChar ch)
2518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTerminatedBuffer() + 2;
2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Special normalization function for contraction in the previous iterator.
2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This normalization sequence will place the current character at source->pos
2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and its following normalized sequence into the buffer.
2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The fcd position, pos will be changed.
2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* pos will now point to positions in the buffer.
2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Flags will be changed accordingly.
2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data
2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizePrevContraction(collIterate *data, UErrorCode *status)
2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pEnd = data->pos + 1;         /* End normalize + 1 */
253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pStart;
2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString endOfBuffer;
2538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->flags & UCOL_ITER_HASLEN) {
2539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        normalization buffer not used yet, we'll pull down the next
2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        character into the end of the buffer
2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
254350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        endOfBuffer.setTo(*pEnd);
2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        endOfBuffer.setTo(data->writableBuffer, 1);  // after the leading NUL
2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->fcdPosition == NULL) {
2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pStart = data->string;
2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pStart = data->fcdPosition + 1;
2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
255550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t normLen =
255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)),
255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             data->writableBuffer,
255850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             *status).
255950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length();
256050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
256150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
2562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    this puts the null termination infront of the normalized string instead
2565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    of the end
2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
256750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->pos =
256850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminatedBuffer() +
256950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        1 + normLen;
2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->origFlags  = data->flags;
2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     |= UCOL_ITER_INNORMBUF;
2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
2573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Contraction character management function that returns the previous character
2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* for the backwards iterator.
2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Does nothing if the previous character is in buffer and not the first
2579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* character in it.
2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Else it checks previous character in data string to see if it is
2581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* normalizable.
2582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* If it is not, the character is simply copied into the buffer, else
2583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the whole normalized substring is copied into the buffer, including the
2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* current character.
2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data
2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return previous character
2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status)
2590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar  prevch;
2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar  ch;
259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *start;
2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ||
2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (innormbuf && *(data->pos - 1) != 0)) {
2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if no normalization.
2599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if previous character is in normalized buffer, no further normalization
2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        is required
2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
2602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if(data->flags & UCOL_USE_ITERATOR) {
2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->iterator->move(data->iterator, -1, UITER_CURRENT);
2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (UChar)data->iterator->next(data->iterator);
2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      } else {
2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *(data->pos - 1);
2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      }
2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    start = data->pos;
2611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ((data->fcdPosition==NULL)||(data->flags & UCOL_ITER_HASLEN)) {
2612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* in data string */
2613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((start - 1) == data->string) {
2614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(start - 1);
2615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        start --;
2617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ch     = *start;
2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prevch = *(start - 1);
2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        in writable buffer, at this point fcdPosition can not be NULL.
2623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        see contracting tag.
2624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
2625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (data->fcdPosition == data->string) {
2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* at the start of the string, just dump it into the normalizer */
262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            insertBufferFront(data, *(data->fcdPosition));
2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->fcdPosition = NULL;
2629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos - 1);
2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        start  = data->fcdPosition;
2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ch     = *start;
2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prevch = *(start - 1);
2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * if the current character is not fcd.
2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Trailing combining class == 0.
2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->fcdPosition > start &&
2640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       (ch >= NFC_ZERO_CC_BLOCK_LIMIT_ || prevch >= NFC_ZERO_CC_BLOCK_LIMIT_))
2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        Need a more complete FCD check and possible normalization.
2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        normalize substring will be appended to buffer
2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
264650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *backuppos = data->pos;
2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->pos = start;
2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (collPrevIterFCD(data)) {
2649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            normalizePrevContraction(data, status);
2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos - 1);
2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->pos = backuppos;
2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->fcdPosition ++;
2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (innormbuf) {
2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    no normalization is to be done hence only one character will be
2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    appended to the buffer.
2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
266150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        insertBufferFront(data, ch);
2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->fcdPosition --;
2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ch;
2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function handles the special CEs like contractions, expansions, surrogates, Thai */
2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* It is called by getNextCE */
2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2671b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* The following should be even */
2672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define UCOL_MAX_DIGITS_FOR_NUMBER 254
2673b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
2674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, collIterate *source, UErrorCode *status) {
2675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collIterateState entryState;
2676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    backupState(source, &entryState);
2677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 cp = ch;
2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for (;;) {
2680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // This loop will repeat only in the case of contractions, and only when a contraction
2681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   is found and the first CE resulting from that contraction is itself a special
2682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   (an expansion, for example.)  All other special CE types are fully handled the
2683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   first time through, and the loop exits.
2684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        const uint32_t *CEOffset = NULL;
2686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        switch(getCETag(CE)) {
2687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case NOT_FOUND_TAG:
2688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* This one is not found, and we'll let somebody else bother about it... no more games */
2689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return CE;
2690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case SPEC_PROC_TAG:
2691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
2692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Special processing is getting a CE that is preceded by a certain prefix
2693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Currently this is only needed for optimizing Japanese length and iteration marks.
2694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // When we encouter a special processing tag, we go backwards and try to see if
2695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // we have a match.
2696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Contraction tables are used - so the whole process is not unlike contraction.
2697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // prefix data is stored backwards in the table.
2698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                const UChar *UCharOffset;
2699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar schar, tchar;
2700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collIterateState prefixState;
2701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                backupState(source, &prefixState);
2702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                loadState(source, &entryState, TRUE);
2703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goBackOne(source); // We want to look at the point where we entered - actually one
2704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // before that...
2705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for(;;) {
2707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // This loop will run once per source string character, for as long as we
2708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //  are matching a potential contraction sequence
2709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // First we position ourselves at the begining of contraction sequence
2711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
2712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (collIter_bos(source)) {
2713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
2714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    schar = getPrevNormalizedChar(source, status);
2717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goBackOne(source);
2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
2720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCharOffset++;
2721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (schar == tchar) {
2724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Found the source string char in the table.
2725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  Pick up the corresponding CE from the table.
2726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
2727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (UCharOffset - coll->contractionIndex));
2728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else
2730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    {
2731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Source string char was not in the table.
2732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   We have not found the prefix.
2733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
2734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (ContractionStart - coll->contractionIndex));
2735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isPrefix(CE)) {
2738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // The source string char was in the contraction table, and the corresponding
2739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   CE is not a prefix CE.  We found the prefix, break
2740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   out of loop, this CE will end up being returned.  This is the normal
2741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   way out of prefix handling when the source actually contained
2742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   the prefix.
2743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
2746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE != UCOL_NOT_FOUND) { // we found something and we can merilly continue
2747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    loadState(source, &prefixState, TRUE);
2748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(source->origFlags & UCOL_USE_ITERATOR) {
2749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->flags = source->origFlags;
2750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { // prefix search was a failure, we have to backup all the way to the start
2752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    loadState(source, &entryState, TRUE);
2753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
2755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CONTRACTION_TAG:
2757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
2758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* This should handle contractions */
2759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collIterateState state;
2760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                backupState(source, &state);
2761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t firstCE = *(coll->contractionCEs + ((UChar *)coll->image+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND;
2762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                const UChar *UCharOffset;
2763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar schar, tchar;
2764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for (;;) {
2766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* This loop will run once per source string character, for as long as we     */
2767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*  are matching a potential contraction sequence                  */
2768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* First we position ourselves at the begining of contraction sequence */
2770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
2771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (collIter_eos(source)) {
2773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Ran off the end of the source string.
2774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
2775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // So we'll pick whatever we have at the point...
2776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (CE == UCOL_NOT_FOUND) {
2777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // back up the source over all the chars we scanned going into this contraction.
2778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            CE = firstCE;
2779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            loadState(source, &state, TRUE);
2780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(source->origFlags & UCOL_USE_ITERATOR) {
2781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                source->flags = source->origFlags;
2782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
2783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
2784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint8_t maxCC = (uint8_t)(*(UCharOffset)&0xFF); /*get the discontiguos stuff */ /* skip the backward offset, see above */
2788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8);
2789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    schar = getNextNormalizedChar(source);
2791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
2792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCharOffset++;
2793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (schar == tchar) {
2796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Found the source string char in the contraction table.
2797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  Pick up the corresponding CE from the table.
2798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
2799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (UCharOffset - coll->contractionIndex));
2800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else
2802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    {
2803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Source string char was not in contraction table.
2804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   Unless we have a discontiguous contraction, we have finished
2805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   with this contraction.
2806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // in order to do the proper detection, we
2807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // need to see if we're dealing with a supplementary
2808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* We test whether the next two char are surrogate pairs.
2809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        * This test is done if the iterator is not NULL.
2810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        * If there is no surrogate pair, the iterator
2811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        * goes back one if needed. */
2812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UChar32 miss = schar;
2813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (source->iterator) {
2814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UChar32 surrNextChar; /* the next char in the iteration to test */
2815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            int32_t prevPos; /* holds the previous position before move forward of the source iterator */
2816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(U16_IS_LEAD(schar) && source->iterator->hasNext(source->iterator)) {
2817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                prevPos = source->iterator->index;
2818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                surrNextChar = getNextNormalizedChar(source);
2819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (U16_IS_TRAIL(surrNextChar)) {
2820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    miss = U16_GET_SUPPLEMENTARY(schar, surrNextChar);
2821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else if (prevPos < source->iterator->index){
2822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    goBackOne(source);
2823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
2824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
2825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if (U16_IS_LEAD(schar)) {
2826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source));
2827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
2828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        uint8_t sCC;
2830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (miss < 0x300 ||
2831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            maxCC == 0 ||
2832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (sCC = i_getCombiningClass(miss, coll)) == 0 ||
2833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sCC>maxCC ||
2834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (allSame != 0 && sCC == maxCC) ||
2835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            collIter_eos(source))
2836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        {
2837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //  Contraction can not be discontiguous.
2838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goBackOne(source);  // back up the source string by one,
2839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //  because  the character we just looked at was
2840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //  not part of the contraction.   */
2841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(U_IS_SUPPLEMENTARY(miss)) {
2842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goBackOne(source);
2843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
2844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            CE = *(coll->contractionCEs +
2845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                (ContractionStart - coll->contractionIndex));
2846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
2847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //
2848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // Contraction is possibly discontiguous.
2849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //   Scan more of source string looking for a match
2850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //
2851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UChar tempchar;
2852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* find the next character if schar is not a base character
2853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            and we are not yet at the end of the string */
2854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tempchar = getNextNormalizedChar(source);
2855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // probably need another supplementary thingie here
2856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goBackOne(source);
2857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (i_getCombiningClass(tempchar, coll) == 0) {
2858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goBackOne(source);
2859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if(U_IS_SUPPLEMENTARY(miss)) {
2860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    goBackOne(source);
2861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
2862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                /* Spit out the last char of the string, wasn't tasty enough */
2863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                CE = *(coll->contractionCEs +
2864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    (ContractionStart - coll->contractionIndex));
2865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
2866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                CE = getDiscontiguous(coll, source, ContractionStart);
2867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
2868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
2869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } // else after if(schar == tchar)
2870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE == UCOL_NOT_FOUND) {
2872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* The Source string did not match the contraction that we were checking.  */
2873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /*  Back up the source position to undo the effects of having partially    */
2874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /*   scanned through what ultimately proved to not be a contraction.       */
2875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        loadState(source, &state, TRUE);
2876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = firstCE;
2877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isContraction(CE)) {
2881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // The source string char was in the contraction table, and the corresponding
2882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   CE is not a contraction CE.  We completed the contraction, break
2883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   out of loop, this CE will end up being returned.  This is the normal
2884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   way out of contraction handling when the source actually contained
2885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   the contraction.
2886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // The source string char was in the contraction table, and the corresponding
2891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   CE is IS  a contraction CE.  We will continue looping to check the source
2892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   string for the remaining chars in the contraction.
2893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));
2894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tempCE != UCOL_NOT_FOUND) {
2895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // We have scanned a a section of source string for which there is a
2896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  CE from the contraction table.  Remember the CE and scan position, so
2897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  that we can return to this point if further scanning fails to
2898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  match a longer contraction sequence.
2899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        firstCE = tempCE;
2900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goBackOne(source);
2902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        backupState(source, &state);
2903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        getNextNormalizedChar(source);
2904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Another way to do this is:
2906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //collIterateState tempState;
2907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //backupState(source, &tempState);
2908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //goBackOne(source);
2909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //backupState(source, &state);
2910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //loadState(source, &tempState, TRUE);
2911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // The problem is that for incomplete contractions we have to remember the previous
2913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // position. Before, the only thing I needed to do was state.pos--;
2914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // After iterator introduction and especially after introduction of normalizing
2915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // iterators, it became much more difficult to decrease the saved state.
2916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // I'm not yet sure which of the two methods above is faster.
2917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } // for(;;)
2919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
2920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } // case CONTRACTION_TAG:
2921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case LONG_PRIMARY_TAG:
2922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
2923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
2924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
2925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->offsetRepeatCount += 1;
2926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return CE;
2927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case EXPANSION_TAG:
2929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
2930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* This should handle expansion. */
2931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* NOTE: we can encounter both continuations and expansions in an expansion! */
2932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* I have to decide where continuations are going to be dealt with */
2933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t size;
2934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t i;    /* general counter */
2935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
2937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                size = getExpansionCount(CE);
2938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = *CEOffset++;
293927f654740f2a26ad62a5c155af9199af9e69b889claireho              //source->offsetRepeatCount = -1;
2940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
2942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    for(i = 1; i<size; i++) {
2943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = *CEOffset++;
294427f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->offsetRepeatCount += 1;
2945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* else, we do */
2947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(*CEOffset != 0) {
2948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = *CEOffset++;
294927f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->offsetRepeatCount += 1;
2950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return CE;
2954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case DIGIT_TAG:
2956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
2957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
2958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                We do a check to see if we want to collate digits as numbers; if so we generate
2959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                a custom collation key. Otherwise we pull out the value stored in the expansion table.
2960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
2961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //uint32_t size;
2962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t i;    /* general counter */
2963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->coll->numericCollation == UCOL_ON){
2965b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    collIterateState digitState = {0,0,0,0,0,0,0,0,0};
2966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UChar32 char32 = 0;
2967b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    int32_t digVal = 0;
2968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t digIndx = 0;
2970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t endIndex = 0;
2971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t trailingZeroIndex = 0;
2972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint8_t collateVal = 0;
2974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UBool nonZeroValReached = FALSE;
2976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2977b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3]; // I just need a temporary place to store my generated CEs.
2978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
2979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                         We parse the source string until we hit a char that's NOT a digit.
2980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        Use this u_charDigitValue. This might be slow because we have to
2981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        handle surrogates...
2982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
2983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /*
2984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (U16_IS_LEAD(ch)){
2985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                      if (!collIter_eos(source)) {
2986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        backupState(source, &digitState);
2987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UChar trail = getNextNormalizedChar(source);
2988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(U16_IS_TRAIL(trail)) {
2989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                          char32 = U16_GET_SUPPLEMENTARY(ch, trail);
2990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
2991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                          loadState(source, &digitState, TRUE);
2992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                          char32 = ch;
2993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
2994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                      } else {
2995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        char32 = ch;
2996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                      }
2997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
2998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      char32 = ch;
2999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    digVal = u_charDigitValue(char32);
3001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            */
3002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    digVal = u_charDigitValue(cp); // if we have arrived here, we have
3003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // already processed possible supplementaries that trigered the digit tag -
3004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // all supplementaries are marked in the UCA.
3005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        We  pad a zero in front of the first element anyways. This takes
3007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        care of the (probably) most common case where people are sorting things followed
3008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        by a single digit
3009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    digIndx++;
3011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    for(;;){
3012b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // Make sure we have enough space. No longer needed;
3013b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // at this point digIndx now has a max value of UCOL_MAX_DIGITS_FOR_NUMBER
3014b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // (it has been pre-incremented) so we just ensure that numTempBuf is big enough
3015b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3).
3016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Skipping over leading zeroes.
3018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (digVal != 0) {
3019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            nonZeroValReached = TRUE;
3020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (nonZeroValReached) {
3022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /*
3023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            We parse the digit string into base 100 numbers (this fits into a byte).
3024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            We only add to the buffer in twos, thus if we are parsing an odd character,
3025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            that serves as the 'tens' digit while the if we are parsing an even one, that
3026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
3027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
3028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
3029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            than all the other bytes.
3030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            */
3031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (digIndx % 2 == 1){
3033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                collateVal += (uint8_t)digVal;
3034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // We don't enter the low-order-digit case unless we've already seen
3036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // the high order, or for the first digit, which is always non-zero.
3037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (collateVal != 0)
3038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    trailingZeroIndex = 0;
3039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
3041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                collateVal = 0;
3042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            else{
3044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // We drop the collation value into the buffer so if we need to do
3045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // a "front patch" we don't have to check to see if we're hitting the
3046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // last element.
3047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                collateVal = (uint8_t)(digVal * 10);
3048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // Check for trailing zeroes.
3050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (collateVal == 0)
3051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                {
3052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if (!trailingZeroIndex)
3053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        trailingZeroIndex = (digIndx/2) + 2;
3054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                else
3056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    trailingZeroIndex = 0;
3057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
3059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            digIndx++;
3061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Get next character.
3064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (!collIter_eos(source)){
3065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            ch = getNextNormalizedChar(source);
3066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (U16_IS_LEAD(ch)){
3067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (!collIter_eos(source)) {
3068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    backupState(source, &digitState);
3069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    UChar trail = getNextNormalizedChar(source);
3070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(U16_IS_TRAIL(trail)) {
3071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        char32 = U16_GET_SUPPLEMENTARY(ch, trail);
3072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    } else {
3073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        loadState(source, &digitState, TRUE);
3074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        char32 = ch;
3075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
3076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
3078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                char32 = ch;
3079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3081b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if ((digVal = u_charDigitValue(char32)) == -1 || digIndx > UCOL_MAX_DIGITS_FOR_NUMBER){
3082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // Resetting position to point to the next unprocessed char. We
3083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // overshot it when doing our test/set for numbers.
3084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (char32 > 0xFFFF) { // For surrogates.
3085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    loadState(source, &digitState, TRUE);
3086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    //goBackOne(source);
3087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goBackOne(source);
3089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                break;
3090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
3092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            break;
3093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (nonZeroValReached == FALSE){
3097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        digIndx = 2;
3098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        numTempBuf[2] = 6;
3099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;
3102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (digIndx % 2 != 0){
3103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /*
3104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what
3105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward.
3106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a
3107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        single pass and optimizes for strings with single digits. I'm just assuming that's the more common case.
3108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        */
3109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        for(i = 2; i < endIndex; i++){
3111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            numTempBuf[i] =     (((((numTempBuf[i] - 6)/2) % 10) * 10) +
3112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
3113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        --digIndx;
3115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Subtract one off of the last byte.
3118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[endIndex-1] -= 1;
3119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    We want to skip over the first two slots in the buffer. The first slot
3122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
3123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
3124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
3126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
3127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Now transfer the collation key to our collIterate struct.
3129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
3130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //size = ((endIndex+1) & ~1)/2;
3131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
3132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
3133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_BYTE_COMMON; // Tertiary weight.
3134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    i = 2; // Reset the index into the buffer.
3135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(i < endIndex)
3136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    {
3137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        uint32_t primWeight = numTempBuf[i++] << 8;
3138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if ( i < endIndex)
3139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            primWeight |= numTempBuf[i++];
3140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
3141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // no numeric mode, we'll just switch to whatever we stashed and continue
3145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
3146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = *CEOffset++;
3147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
3148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return CE;
3150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* various implicits optimization */
3152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case IMPLICIT_TAG:        /* everything that is not defined otherwise */
3153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* UCA is filled with these. Tailorings are NOT_FOUND */
3154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return getImplicit(cp, source);
3155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
3156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImplicit
3157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return getImplicit(cp, source);
3158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
3159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t
3161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
3162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t LCount = 19;
3163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t VCount = 21;
3164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t TCount = 28;
3165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t NCount = VCount * TCount;   // 588
3166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t SCount = LCount * NCount;   // 11172
3167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t L = ch - SBase;
3168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // divide into pieces
3170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t T = L % TCount; // we do it in this order since some compilers can do % and / in one operation
3172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L /= TCount;
3173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t V = L % VCount;
3174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L /= VCount;
3175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // offset them
3177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L += LBase;
3179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                V += VBase;
3180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                T += TBase;
3181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // return the first CE, but first put the rest into the expansion buffer
3183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (!source->coll->image->jamoSpecial) { // FAST PATH
3184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
3186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (T != TBase) {
3187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
3188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
3191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { // Jamo is Special
3193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Since Hanguls pass the FCD check, it is
3194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // guaranteed that we won't be in
3195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // the normalization buffer if something like this happens
3196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // However, if we are using a uchar iterator and normalization
3197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // is ON, the Hangul that lead us here is going to be in that
3198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // normalization buffer. Here we want to restore the uchar
3199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // iterator state and pull out of the normalization buffer
3200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) {
3201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->flags = source->origFlags; // restore the iterator
3202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->pos = NULL;
3203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Move Jamos into normalization buffer
320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar *buffer = source->writableBuffer.getBuffer(4);
320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t bufferLength;
320750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    buffer[0] = (UChar)L;
320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    buffer[1] = (UChar)V;
3209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (T != TBase) {
321050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        buffer[2] = (UChar)T;
321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        bufferLength = 3;
3212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        bufferLength = 2;
3214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    source->writableBuffer.releaseBuffer(bufferLength);
3216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->fcdPosition       = source->pos;   // Indicate where to continue in main input string
3218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   after exhausting the writableBuffer
321950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    source->pos   = source->writableBuffer.getTerminatedBuffer();
3220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->origFlags   = source->flags;
3221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->flags       |= UCOL_ITER_INNORMBUF;
3222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->flags       &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
3223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return(UCOL_IGNORABLE);
3225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case SURROGATE_TAG:
3228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* we encountered a leading surrogate. We shall get the CE by using the following code unit */
3229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* two things can happen here: next code point can be a trailing surrogate - we will use it */
3230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* to retrieve the CE, or it is not a trailing surrogate (or the string is done). In that case */
323127f654740f2a26ad62a5c155af9199af9e69b889claireho            /* we treat it like an unassigned code point. */
3232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar trail;
3234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collIterateState state;
3235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                backupState(source, &state);
3236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
3237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // we chould have stepped one char forward and it might have turned that it
3238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // was not a trail surrogate. In that case, we have to backup.
3239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    loadState(source, &state, TRUE);
324027f654740f2a26ad62a5c155af9199af9e69b889claireho                    return UCOL_NOT_FOUND;
3241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* TODO: CE contain the data from the previous CE + the mask. It should at least be unmasked */
3243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFFFF, trail);
3244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates in this block, but not this one.
3245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // We need to backup
3246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        loadState(source, &state, TRUE);
3247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return CE;
3248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // calculate the supplementary code point value, if surrogate was not tailored
3250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
3251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
3254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case LEAD_SURROGATE_TAG:  /* D800-DBFF*/
3255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UChar nextChar;
3256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if( source->flags & UCOL_USE_ITERATOR) {
3257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source->iterator))) {
3258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
3259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->iterator->next(source->iterator);
3260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return getImplicit(cp, source);
3261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) || (source->pos<source->endp)) &&
326327f654740f2a26ad62a5c155af9199af9e69b889claireho                      U_IS_TRAIL((nextChar=*source->pos))) {
326427f654740f2a26ad62a5c155af9199af9e69b889claireho                cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
326527f654740f2a26ad62a5c155af9199af9e69b889claireho                source->pos++;
326627f654740f2a26ad62a5c155af9199af9e69b889claireho                return getImplicit(cp, source);
3267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
326827f654740f2a26ad62a5c155af9199af9e69b889claireho            return UCOL_NOT_FOUND;
3269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
327027f654740f2a26ad62a5c155af9199af9e69b889claireho            return UCOL_NOT_FOUND; /* broken surrogate sequence */
3271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CHARSET_TAG:
3272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* not yet implemented */
3273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* probably after 1.8 */
3274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return UCOL_NOT_FOUND;
3275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        default:
3276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_INTERNAL_PROGRAM_ERROR;
3277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE=0;
3278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
3279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (CE <= UCOL_NOT_FOUND) break;
3281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return CE;
3283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* now uses Mark's getImplicitPrimary code */
3287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
3288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) {
3289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t r = uprv_uca_getImplicitPrimary(cp);
3290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505;
3292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->toReturn = collationSource->CEpos;
3293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
329427f654740f2a26ad62a5c155af9199af9e69b889claireho    // **** doesn't work if using iterator ****
329527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (collationSource->flags & UCOL_ITER_INNORMBUF) {
329627f654740f2a26ad62a5c155af9199af9e69b889claireho        collationSource->offsetRepeatCount = 1;
329727f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
329827f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->string);
3299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
330027f654740f2a26ad62a5c155af9199af9e69b889claireho        UErrorCode errorCode = U_ZERO_ERROR;
330127f654740f2a26ad62a5c155af9199af9e69b889claireho        collationSource->appendOffset(firstOffset, errorCode);
330227f654740f2a26ad62a5c155af9199af9e69b889claireho        collationSource->appendOffset(firstOffset + 1, errorCode);
3303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
330427f654740f2a26ad62a5c155af9199af9e69b889claireho        collationSource->offsetReturn = collationSource->offsetStore - 1;
330527f654740f2a26ad62a5c155af9199af9e69b889claireho        *(collationSource->offsetBuffer) = firstOffset;
330627f654740f2a26ad62a5c155af9199af9e69b889claireho        if (collationSource->offsetReturn == collationSource->offsetBuffer) {
330727f654740f2a26ad62a5c155af9199af9e69b889claireho            collationSource->offsetStore = collationSource->offsetBuffer;
330827f654740f2a26ad62a5c155af9199af9e69b889claireho        }
330927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
3310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return ((r & 0x0000FFFF)<<16) | 0x000000C0;
3312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
3315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function handles the special CEs like contractions, expansions,
3316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * surrogates, Thai.
3317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is called by both getPrevCE
3318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
3319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
3320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          collIterate *source,
3321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          UErrorCode *status)
3322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
3323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const uint32_t *CEOffset    = NULL;
3324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          UChar    *UCharOffset = NULL;
3325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          UChar    schar;
3326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UChar    *constart    = NULL;
3327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          uint32_t size;
3328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          UChar    buffer[UCOL_MAX_BUFFER];
3329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          uint32_t *endCEBuffer;
3330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          UChar   *strbuffer;
3331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          int32_t noChars = 0;
3332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          int32_t CECount = 0;
3333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for(;;)
3335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
3336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* the only ces that loops are thai and contractions */
3337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        switch (getCETag(CE))
3338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
3339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case NOT_FOUND_TAG:  /* this tag always returns */
3340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return CE;
3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case SPEC_PROC_TAG:
3343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Special processing is getting a CE that is preceded by a certain prefix
3345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Currently this is only needed for optimizing Japanese length and iteration marks.
3346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // When we encouter a special processing tag, we go backwards and try to see if
3347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // we have a match.
3348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Contraction tables are used - so the whole process is not unlike contraction.
3349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // prefix data is stored backwards in the table.
3350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                const UChar *UCharOffset;
3351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar schar, tchar;
3352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collIterateState prefixState;
3353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                backupState(source, &prefixState);
3354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for(;;) {
3355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // This loop will run once per source string character, for as long as we
3356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //  are matching a potential contraction sequence
3357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // First we position ourselves at the begining of contraction sequence
3359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
3360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (collIter_bos(source)) {
3362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
3363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
3364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    schar = getPrevNormalizedChar(source, status);
3366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goBackOne(source);
3367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
3369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCharOffset++;
3370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (schar == tchar) {
3373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Found the source string char in the table.
3374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  Pick up the corresponding CE from the table.
3375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
3376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (UCharOffset - coll->contractionIndex));
3377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else
3379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    {
3380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // if there is a completely ignorable code point in the middle of
3381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // a prefix, we need to act as if it's not there
3382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero)
3383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // lone surrogates cannot be set to zero as it would break other processing
3384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
3385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // it's easy for BMP code points
3386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(isZeroCE == 0) {
3387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
338827f654740f2a26ad62a5c155af9199af9e69b889claireho                        } else if(U16_IS_SURROGATE(schar)) {
3389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // for supplementary code points, we have to check the next one
3390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // situations where we are going to ignore
3391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // 1. beginning of the string: schar is a lone surrogate
3392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // 2. schar is a lone surrogate
3393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // 3. schar is a trail surrogate in a valid surrogate sequence
3394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //    that is explicitly set to zero.
3395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (!collIter_bos(source)) {
3396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                UChar lead;
339727f654740f2a26ad62a5c155af9199af9e69b889claireho                                if(!U16_IS_SURROGATE_LEAD(schar) && U16_IS_LEAD(lead = getPrevNormalizedChar(source, status))) {
3398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, lead);
339927f654740f2a26ad62a5c155af9199af9e69b889claireho                                    if(isSpecial(isZeroCE) && getCETag(isZeroCE) == SURROGATE_TAG) {
3400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar);
3401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        if(finalCE == 0) {
3402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                            // this is a real, assigned completely ignorable code point
3403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                            goBackOne(source);
3404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                            continue;
3405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        }
3406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
3407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
340827f654740f2a26ad62a5c155af9199af9e69b889claireho                                    // lone surrogate, treat like unassigned
340927f654740f2a26ad62a5c155af9199af9e69b889claireho                                    return UCOL_NOT_FOUND;
3410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
341227f654740f2a26ad62a5c155af9199af9e69b889claireho                                // lone surrogate at the beggining, treat like unassigned
341327f654740f2a26ad62a5c155af9199af9e69b889claireho                                return UCOL_NOT_FOUND;
3414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Source string char was not in the table.
3417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   We have not found the prefix.
3418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
3419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (ContractionStart - coll->contractionIndex));
3420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isPrefix(CE)) {
3423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // The source string char was in the contraction table, and the corresponding
3424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   CE is not a prefix CE.  We found the prefix, break
3425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   out of loop, this CE will end up being returned.  This is the normal
3426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   way out of prefix handling when the source actually contained
3427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   the prefix.
3428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
3429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                loadState(source, &prefixState, TRUE);
3432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
3433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
343550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case CONTRACTION_TAG: {
3436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* to ensure that the backwards and forwards iteration matches, we
3437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            take the current region of most possible match and pass it through
3438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            the forward iteration. this will ensure that the obstinate problem of
3439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            overlapping contractions will not occur.
3440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            */
344127f654740f2a26ad62a5c155af9199af9e69b889claireho            schar = peekCodeUnit(source, 0);
3442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            constart = (UChar *)coll->image + getContractOffset(CE);
3443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (isAtStartPrevIterate(source)
3444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* commented away contraction end checks after adding the checks
3445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                in getPrevCE  */) {
3446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* start of string or this is not the end of any contraction */
3447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = *(coll->contractionCEs +
3448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        (constart - coll->contractionIndex));
3449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
3450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            strbuffer = buffer;
3452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1);
3453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *(UCharOffset --) = 0;
3454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            noChars = 0;
3455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // have to swap thai characters
3456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while (ucol_unsafeCP(schar, coll)) {
3457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(UCharOffset) = schar;
3458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                noChars++;
3459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UCharOffset --;
3460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                schar = getPrevNormalizedChar(source, status);
3461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goBackOne(source);
3462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // TODO: when we exhaust the contraction buffer,
3463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // it needs to get reallocated. The problem is
3464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // that the size depends on the string which is
3465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // not iterated over. However, since we're travelling
3466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // backwards, we already had to set the iterator at
3467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // the end - so we might as well know where we are?
3468c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (UCharOffset + 1 == buffer) {
3469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* we have exhausted the buffer */
3470c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    int32_t newsize = 0;
3471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(source->pos) { // actually dealing with a position
347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        newsize = (int32_t)(source->pos - source->string + 1);
3473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else { // iterator
3474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        newsize = 4 * UCOL_MAX_BUFFER;
3475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    strbuffer = (UChar *)uprv_malloc(sizeof(UChar) *
3477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        (newsize + UCOL_MAX_BUFFER));
3478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* test for NULL */
3479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (strbuffer == NULL) {
3480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_MEMORY_ALLOCATION_ERROR;
3481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_NO_MORE_CES;
3482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UCharOffset = strbuffer + newsize;
3484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uprv_memcpy(UCharOffset, buffer,
3485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_MAX_BUFFER * sizeof(UChar));
3486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UCharOffset --;
3487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if ((source->pos && (source->pos == source->string ||
3489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    ((source->flags & UCOL_ITER_INNORMBUF) &&
3490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->pos - 1) == 0 && source->fcdPosition == NULL)))
3491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    || (source->iterator && !source->iterator->hasPrevious(source->iterator))) {
3492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
3493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* adds the initial base character to the string */
3496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *(UCharOffset) = schar;
3497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            noChars++;
3498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            int32_t offsetBias;
3500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // **** doesn't work if using iterator ****
3502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (source->flags & UCOL_ITER_INNORMBUF) {
3503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                offsetBias = -1;
3504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
3505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                offsetBias = (int32_t)(source->pos - source->string);
3506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* a new collIterate is used to simplify things, since using the current
3509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            collIterate will mean that the forward and backwards iteration will
3510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            share and change the same buffers. we don't want to get into that. */
3511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            collIterate temp;
3512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            int32_t rawOffset;
3513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
351450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            IInit_collIterate(coll, UCharOffset, noChars, &temp, status);
351550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(U_FAILURE(*status)) {
351650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return UCOL_NULLORDER;
351750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
3518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            temp.flags &= ~UCOL_ITER_NORM;
3519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            temp.flags |= source->flags & UCOL_FORCE_HAN_IMPLICIT;
3520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
352150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            rawOffset = (int32_t)(temp.pos - temp.string); // should always be zero?
3522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE = ucol_IGetNextCE(coll, &temp, status);
3523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (source->extendCEs) {
3525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                endCEBuffer = source->extendCEs + source->extendCEsSize;
352650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                CECount = (int32_t)((source->CEpos - source->extendCEs)/sizeof(uint32_t));
3527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
3528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE;
352950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                CECount = (int32_t)((source->CEpos - source->CEs)/sizeof(uint32_t));
3530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while (CE != UCOL_NO_MORE_CES) {
3533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->CEpos ++) = CE;
3534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (offsetBias >= 0) {
353627f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->appendOffset(rawOffset + offsetBias, *status);
3537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CECount++;
3540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->CEpos == endCEBuffer) {
3541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* ran out of CE space, reallocate to new buffer.
3542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    If reallocation fails, reset pointers and bail out,
3543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    there's no guarantee of the right character position after
3544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    this bail*/
354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (!increaseCEsCapacity(source)) {
3546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_MEMORY_ALLOCATION_ERROR;
354727f654740f2a26ad62a5c155af9199af9e69b889claireho                        break;
3548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endCEBuffer = source->extendCEs + source->extendCEsSize;
3551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) {
355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    rawOffset = (int32_t)(temp.fcdPosition - temp.string);
3555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                } else {
355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    rawOffset = (int32_t)(temp.pos - temp.string);
3557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                }
3558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &temp, status);
3560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
356227f654740f2a26ad62a5c155af9199af9e69b889claireho            if (strbuffer != buffer) {
356327f654740f2a26ad62a5c155af9199af9e69b889claireho                uprv_free(strbuffer);
356427f654740f2a26ad62a5c155af9199af9e69b889claireho            }
356527f654740f2a26ad62a5c155af9199af9e69b889claireho            if (U_FAILURE(*status)) {
356627f654740f2a26ad62a5c155af9199af9e69b889claireho                return (uint32_t)UCOL_NULLORDER;
356727f654740f2a26ad62a5c155af9199af9e69b889claireho            }
356827f654740f2a26ad62a5c155af9199af9e69b889claireho
356927f654740f2a26ad62a5c155af9199af9e69b889claireho            if (source->offsetRepeatValue != 0) {
3570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (CECount > noChars) {
357127f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->offsetRepeatCount += temp.offsetRepeatCount;
3572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // **** does this really skip the right offsets? ****
3574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetReturn -= (noChars - CECount);
3575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (offsetBias >= 0) {
3579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->offsetReturn = source->offsetStore - 1;
3580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->offsetReturn == source->offsetBuffer) {
3581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetStore = source->offsetBuffer;
3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            source->toReturn = source->CEpos - 1;
3586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (source->toReturn == source->CEs) {
3587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->CEpos = source->CEs;
3588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *(source->toReturn);
359150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
3592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case LONG_PRIMARY_TAG:
3593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
3595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
3596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->toReturn = source->CEpos - 1;
3597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
359827f654740f2a26ad62a5c155af9199af9e69b889claireho                if (source->flags & UCOL_ITER_INNORMBUF) {
3599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetRepeatCount = 1;
360027f654740f2a26ad62a5c155af9199af9e69b889claireho                } else {
360127f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t firstOffset = (int32_t)(source->pos - source->string);
3602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
360327f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->appendOffset(firstOffset, *status);
360427f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->appendOffset(firstOffset + 1, *status);
3605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
360627f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->offsetReturn = source->offsetStore - 1;
360727f654740f2a26ad62a5c155af9199af9e69b889claireho                    *(source->offsetBuffer) = firstOffset;
360827f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (source->offsetReturn == source->offsetBuffer) {
360927f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->offsetStore = source->offsetBuffer;
361027f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
361127f654740f2a26ad62a5c155af9199af9e69b889claireho                }
3612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return *(source->toReturn);
3615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case EXPANSION_TAG: /* this tag always returns */
3618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /*
3620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            This should handle expansion.
3621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            NOTE: we can encounter both continuations and expansions in an expansion!
3622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            I have to decide where continuations are going to be dealt with
3623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            */
3624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            int32_t firstOffset = (int32_t)(source->pos - source->string);
3625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // **** doesn't work if using iterator ****
3627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (source->offsetReturn != NULL) {
3628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetReturn == source->offsetBuffer) {
3629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetStore = source->offsetBuffer;
3630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }else {
3631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                  firstOffset = -1;
3632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* find the offset to expansion table */
3636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
3637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            size     = getExpansionCount(CE);
3638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (size != 0) {
3639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
3640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if there are less than 16 elements in expansion, we don't terminate
3641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
3642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t count;
3643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for (count = 0; count < size; count++) {
3645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos ++) = *CEOffset++;
3646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (firstOffset >= 0) {
364827f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->appendOffset(firstOffset + 1, *status);
3649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
3652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* else, we do */
3653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (*CEOffset != 0) {
3654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos ++) = *CEOffset ++;
3655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (firstOffset >= 0) {
365727f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->appendOffset(firstOffset + 1, *status);
3658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (firstOffset >= 0) {
3663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->offsetReturn = source->offsetStore - 1;
3664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->offsetBuffer) = firstOffset;
3665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->offsetReturn == source->offsetBuffer) {
3666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetStore = source->offsetBuffer;
3667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
3669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->offsetRepeatCount += size - 1;
3670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            source->toReturn = source->CEpos - 1;
3673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // in case of one element expansion, we
3674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // want to immediately return CEpos
3675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(source->toReturn == source->CEs) {
3676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->CEpos = source->CEs;
3677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *(source->toReturn);
3680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case DIGIT_TAG:
3683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
3685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                We do a check to see if we want to collate digits as numbers; if so we generate
3686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                a custom collation key. Otherwise we pull out the value stored in the expansion table.
3687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
3688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t i;    /* general counter */
3689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->coll->numericCollation == UCOL_ON){
3691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t digIndx = 0;
3692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t endIndex = 0;
3693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t leadingZeroIndex = 0;
3694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t trailingZeroCount = 0;
3695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint8_t collateVal = 0;
3697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UBool nonZeroValReached = FALSE;
3699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2]; // I just need a temporary place to store my generated CEs.
3701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    We parse the source string until we hit a char that's NOT a digit.
3703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    Use this u_charDigitValue. This might be slow because we have to
3704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    handle surrogates...
3705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    /*
3707b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    We need to break up the digit string into collection elements of UCOL_MAX_DIGITS_FOR_NUMBER or less,
3708b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    with any chunks smaller than that being on the right end of the digit string - i.e. the first collation
3709b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    element we process when going backward. To determine how long that chunk might be, we may need to make
3710b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    two passes through the loop that collects digits - one to see how long the string is (and how much is
3711b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    leading zeros) to determine the length of that right-hand chunk, and a second (if the whole string has
3712b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    more than UCOL_MAX_DIGITS_FOR_NUMBER non-leading-zero digits) to actually process that collation
3713b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    element chunk after resetting the state to the initialState at the right side of the digit string.
3714b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    */
3715b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    uint32_t ceLimit = 0;
3716b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    UChar initial_ch = ch;
3717b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    collIterateState initialState = {0,0,0,0,0,0,0,0,0};
3718b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    backupState(source, &initialState);
3719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3720b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    for(;;) {
3721b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        collIterateState state = {0,0,0,0,0,0,0,0,0};
3722b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        UChar32 char32 = 0;
3723b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        int32_t digVal = 0;
3724b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3725b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        if (U16_IS_TRAIL (ch)) {
3726b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if (!collIter_bos(source)){
3727b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                UChar lead = getPrevNormalizedChar(source, status);
3728b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                if(U16_IS_LEAD(lead)) {
3729b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    char32 = U16_GET_SUPPLEMENTARY(lead,ch);
3730b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    goBackOne(source);
3731b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                } else {
3732b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    char32 = ch;
3733b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                }
3734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
3735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                char32 = ch;
3736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
3738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            char32 = ch;
3739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
3740b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        digVal = u_charDigitValue(char32);
3741b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3742b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        for(;;) {
3743b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // Make sure we have enough space. No longer needed;
3744b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // at this point the largest value of digIndx when we need to save data in numTempBuf
3745b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // is UCOL_MAX_DIGITS_FOR_NUMBER-1 (digIndx is post-incremented) so we just ensure
3746b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // that numTempBuf is big enough (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2).
3747b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3748b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // Skip over trailing zeroes, and keep a count of them.
3749b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if (digVal != 0)
3750b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                nonZeroValReached = TRUE;
3751b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3752b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if (nonZeroValReached) {
3753b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                /*
3754b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                We parse the digit string into base 100 numbers (this fits into a byte).
3755b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                We only add to the buffer in twos, thus if we are parsing an odd character,
3756b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                that serves as the 'tens' digit while the if we are parsing an even one, that
3757b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
3758b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
3759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
3760b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                than all the other bytes.
3761b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3762b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                Since we're doing in this reverse we want to put the first digit encountered into the
3763b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                ones place and the second digit encountered into the tens place.
3764b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                */
3765b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3766b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                if ((digIndx + trailingZeroCount) % 2 == 1) {
3767b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // High-order digit case (tens place)
3768b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    collateVal += (uint8_t)(digVal * 10);
3769b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3770b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // We cannot set leadingZeroIndex unless it has been set for the
3771b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // low-order digit. Therefore, all we can do for the high-order
3772b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // digit is turn it off, never on.
3773b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // The only time we will have a high digit without a low is for
3774b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // the very first non-zero digit, so no zero check is necessary.
3775b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if (collateVal != 0)
3776b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        leadingZeroIndex = 0;
3777b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3778b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // The first pass through, digIndx may exceed the limit, but in that case
3779b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // we no longer care about numTempBuf contents since they will be discarded
3780b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) {
3781b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
3782b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    }
3783b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    collateVal = 0;
3784b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                } else {
3785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // Low-order digit case (ones place)
3786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    collateVal = (uint8_t)digVal;
3787b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // Check for leading zeroes.
3789b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if (collateVal == 0) {
3790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        if (!leadingZeroIndex)
3791b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                            leadingZeroIndex = (digIndx/2) + 2;
3792b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    } else
3793b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        leadingZeroIndex = 0;
3794b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3795b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // No need to write to buffer; the case of a last odd digit
3796b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // is handled below.
3797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3798b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                ++digIndx;
3799b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            } else
3800b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                ++trailingZeroCount;
3801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if (!collIter_bos(source)) {
3803b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                ch = getPrevNormalizedChar(source, status);
3804b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                //goBackOne(source);
3805b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                if (U16_IS_TRAIL(ch)) {
3806b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    backupState(source, &state);
3807b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if (!collIter_bos(source)) {
3808b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        goBackOne(source);
3809b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        UChar lead = getPrevNormalizedChar(source, status);
3810b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        if(U16_IS_LEAD(lead)) {
3812b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                            char32 = U16_GET_SUPPLEMENTARY(lead,ch);
3813b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        } else {
3814b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                            loadState(source, &state, FALSE);
3815b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                            char32 = ch;
3816b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        }
3817b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    }
3818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else
3819b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    char32 = ch;
3820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3821b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                if ((digVal = u_charDigitValue(char32)) == -1 || (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) {
3822b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if (char32 > 0xFFFF) {// For surrogates.
3823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        loadState(source, &state, FALSE);
3824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
3825b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // Don't need to "reverse" the goBackOne call,
3826b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // as this points to the next position to process..
3827b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    //if (char32 > 0xFFFF) // For surrogates.
3828b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    //getNextNormalizedChar(source);
3829b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    break;
3830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3832b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                goBackOne(source);
3833b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            }else
3834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                break;
3835b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        }
3836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3837b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_NUMBER) {
3838b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // our collation element is not too big, go ahead and finish with it
3839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            break;
3840b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        }
3841b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // our digit string is too long for a collation element;
3842b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // set the limit for it, reset the state and begin again
3843b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGITS_FOR_NUMBER;
3844b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        if ( ceLimit == 0 ) {
3845b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER;
3846b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        }
3847b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        ch = initial_ch;
3848b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        loadState(source, &initialState, FALSE);
3849b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        digIndx = endIndex = leadingZeroIndex = trailingZeroCount = 0;
3850b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        collateVal = 0;
3851b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        nonZeroValReached = FALSE;
3852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (! nonZeroValReached) {
3855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        digIndx = 2;
3856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        trailingZeroCount = 0;
3857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        numTempBuf[2] = 6;
3858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if ((digIndx + trailingZeroCount) % 2 != 0) {
3861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
3862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        digIndx += 1;       // The implicit leading zero
3863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (trailingZeroCount % 2 != 0) {
3865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // We had to consume one trailing zero for the low digit
3866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // of the least significant byte
3867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        digIndx += 1;       // The trailing zero not in the exponent
3868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        trailingZeroCount -= 1;
3869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;
3872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Subtract one off of the last byte. Really the first byte here, but it's reversed...
3874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[2] -= 1;
3875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    We want to skip over the first two slots in the buffer. The first slot
3878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
3879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
3880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    The exponent must be adjusted by the number of leading zeroes, and the number of
3881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    trailing zeroes.
3882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
3884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t exponent = (digIndx+trailingZeroCount)/2;
3885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (leadingZeroIndex)
3886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
3887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F));
3888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Now transfer the collation key to our collIterate struct.
389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The total size for our collation key is half of endIndex, rounded up.
389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t size = (endIndex+1)/2;
389250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(!ensureCEsCapacity(source, size)) {
389350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return UCOL_NULLORDER;
389450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
3895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
3896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
3897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_BYTE_COMMON; // Tertiary weight.
3898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    i = endIndex - 1; // Reset the index into the buffer.
3899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(i >= 2) {
3900b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        uint32_t primWeight = numTempBuf[i--] << 8;
3901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if ( i >= 2)
3902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            primWeight |= numTempBuf[i--];
3903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
3904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->toReturn = source->CEpos -1;
3907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return *(source->toReturn);
3908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
3910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = *(CEOffset++);
3911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
3916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t
3918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
3919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t LCount = 19;
3920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t VCount = 21;
3921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t TCount = 28;
3922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t NCount = VCount * TCount;   /* 588 */
3923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t SCount = LCount * NCount;   /* 11172 */
3924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t L = ch - SBase;
3926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
3927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                divide into pieces.
3928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                we do it in this order since some compilers can do % and / in one
3929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                operation
3930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
3931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t T = L % TCount;
3932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L /= TCount;
3933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t V = L % VCount;
3934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L /= VCount;
3935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* offset them */
3937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L += LBase;
3938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                V += VBase;
3939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                T += TBase;
3940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
394127f654740f2a26ad62a5c155af9199af9e69b889claireho                int32_t firstOffset = (int32_t)(source->pos - source->string);
394227f654740f2a26ad62a5c155af9199af9e69b889claireho                source->appendOffset(firstOffset, *status);
3943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
3945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 * return the first CE, but first put the rest into the expansion buffer
3946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 */
3947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (!source->coll->image->jamoSpecial) {
3948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
3949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
395027f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->appendOffset(firstOffset + 1, *status);
3951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
395227f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (T != TBase) {
3953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
395427f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->appendOffset(firstOffset + 1, *status);
395527f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
3956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->toReturn = source->CEpos - 1;
3958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
395927f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->offsetReturn = source->offsetStore - 1;
396027f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (source->offsetReturn == source->offsetBuffer) {
396127f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->offsetStore = source->offsetBuffer;
396227f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
396327f654740f2a26ad62a5c155af9199af9e69b889claireho
396427f654740f2a26ad62a5c155af9199af9e69b889claireho                    return *(source->toReturn);
3965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Since Hanguls pass the FCD check, it is
3967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // guaranteed that we won't be in
3968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // the normalization buffer if something like this happens
3969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Move Jamos into normalization buffer
3970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    Move the Jamos into the
3972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    normalization buffer
3973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar *tempbuffer = source->writableBuffer.getBuffer(5);
397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t tempbufferLength;
397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    tempbuffer[0] = 0;
397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    tempbuffer[1] = (UChar)L;
397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    tempbuffer[2] = (UChar)V;
3979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (T != TBase) {
398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        tempbuffer[3] = (UChar)T;
398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        tempbufferLength = 4;
3982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        tempbufferLength = 3;
3984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    source->writableBuffer.releaseBuffer(tempbufferLength);
3986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    Indicate where to continue in main input string after exhausting
3989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    the writableBuffer
3990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (source->pos  == source->string) {
3992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->fcdPosition = NULL;
3993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
3994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->fcdPosition       = source->pos-1;
3995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    source->pos               = source->writableBuffer.getTerminatedBuffer() + tempbufferLength;
3998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->origFlags         = source->flags;
3999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->flags            |= UCOL_ITER_INNORMBUF;
4000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->flags            &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
4001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return(UCOL_IGNORABLE);
4003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case IMPLICIT_TAG:        /* everything that is not defined otherwise */
4007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return getPrevImplicit(ch, source);
4008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // TODO: Remove CJK implicits as they are handled by the getImplicitPrimary function
4010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
4011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return getPrevImplicit(ch, source);
4012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case SURROGATE_TAG:  /* This is a surrogate pair */
401427f654740f2a26ad62a5c155af9199af9e69b889claireho            /* essentially an engaged lead surrogate. */
4015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* if you have encountered it here, it means that a */
4016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* broken sequence was encountered and this is an error */
401727f654740f2a26ad62a5c155af9199af9e69b889claireho            return UCOL_NOT_FOUND;
4018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case LEAD_SURROGATE_TAG:  /* D800-DBFF*/
402027f654740f2a26ad62a5c155af9199af9e69b889claireho            return UCOL_NOT_FOUND; /* broken surrogate sequence */
4021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
4023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
4024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar32 cp = 0;
4025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar  prevChar;
402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const UChar *prev;
4027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (isAtStartPrevIterate(source)) {
4028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* we are at the start of the string, wrong place to be at */
402927f654740f2a26ad62a5c155af9199af9e69b889claireho                    return UCOL_NOT_FOUND;
4030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (source->pos != source->writableBuffer.getBuffer()) {
4032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    prev     = source->pos - 1;
4033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    prev     = source->fcdPosition;
4035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                prevChar = *prev;
4037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* Handles Han and Supplementary characters here.*/
4039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (U16_IS_LEAD(prevChar)) {
4040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
4041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->pos = prev;
4042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
404327f654740f2a26ad62a5c155af9199af9e69b889claireho                    return UCOL_NOT_FOUND; /* like unassigned */
4044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return getPrevImplicit(cp, source);
4047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* UCA is filled with these. Tailorings are NOT_FOUND */
4050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* not yet implemented */
4051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CHARSET_TAG:  /* this tag always returns */
4052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* probably after 1.8 */
4053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return UCOL_NOT_FOUND;
4054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        default:           /* this tag always returns */
4056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_INTERNAL_PROGRAM_ERROR;
4057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE=0;
4058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
4059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (CE <= UCOL_NOT_FOUND) {
4062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
4063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return CE;
4067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This should really be a macro        */
4070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* However, it is used only when stack buffers are not sufficiently big, and then we're messed up performance wise */
4071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* anyway */
4072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
4073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint8_t *reallocateBuffer(uint8_t **secondaries, uint8_t *secStart, uint8_t *second, uint32_t *secSize, uint32_t newSize, UErrorCode *status) {
4074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG
4075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fprintf(stderr, ".");
4076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
4077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t *newStart = NULL;
407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t offset = (uint32_t)(*secondaries-secStart);
4079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(secStart==second) {
4081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        newStart=(uint8_t*)uprv_malloc(newSize);
4082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(newStart==NULL) {
4083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
4084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
4085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(newStart, secStart, *secondaries-secStart);
4087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
4088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        newStart=(uint8_t*)uprv_realloc(secStart, newSize);
4089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(newStart==NULL) {
4090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
4091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* Since we're reallocating, return original reference so we don't loose it. */
4092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return secStart;
4093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *secondaries=newStart+offset;
4096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *secSize=newSize;
4097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return newStart;
4098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This should really be a macro                                                                      */
4102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function is used to reverse parts of a buffer. We need this operation when doing continuation */
4103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* secondaries in French                                                                              */
4104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
4105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid uprv_ucol_reverse_buffer(uint8_t *start, uint8_t *end) {
4106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint8_t temp;
4107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  while(start<end) {
4108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    temp = *start;
4109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *start++ = *end;
4110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *end-- = temp;
4111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
4112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
4114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define uprv_ucol_reverse_buffer(TYPE, start, end) { \
4116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  TYPE tempA; \
4117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruwhile((start)<(end)) { \
4118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    tempA = *(start); \
4119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *(start)++ = *(end); \
4120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *(end)-- = tempA; \
4121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} \
4122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
4125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the sortkey generation functions                           */
4126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                          */
4127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
4128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
4130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Merge two sort keys.
4131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is useful, for example, to combine sort keys from first and last names
4132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to sort such pairs.
4133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Merged sort keys consider on each collation level the first part first entirely,
4134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then the second one.
4135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is possible to merge multiple sort keys by consecutively merging
4136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * another one with the intermediate result.
4137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
4138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The length of the merge result is the sum of the lengths of the input sort keys
4139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * minus 1.
4140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
4141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src1 the first sort key
4142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src1Length the length of the first sort key, including the zero byte at the end;
4143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        can be -1 if the function is to find the length
4144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src2 the second sort key
4145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src2Length the length of the second sort key, including the zero byte at the end;
4146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        can be -1 if the function is to find the length
4147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest the buffer where the merged sort key is written,
4148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        can be NULL if destCapacity==0
4149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param destCapacity the number of bytes in the dest buffer
4150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the length of the merged sort key, src1Length+src2Length-1;
4151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
4152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *         in which cases the contents of dest is undefined
4153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
4154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @draft
4155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
4156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
4157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
4158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   const uint8_t *src2, int32_t src2Length,
4159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   uint8_t *dest, int32_t destCapacity) {
4160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t destLength;
4161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t b;
4162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* check arguments */
4164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( src1==NULL || src1Length<-2 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
4165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src2==NULL || src2Length<-2 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
4166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        destCapacity<0 || (destCapacity>0 && dest==NULL)
4167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
4168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* error, attempt to write a zero byte and return 0 */
4169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(dest!=NULL && destCapacity>0) {
4170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest=0;
4171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
4173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* check lengths and capacity */
4176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(src1Length<0) {
4177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
4178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(src2Length<0) {
4180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
4181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    destLength=src1Length+src2Length-1;
4184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(destLength>destCapacity) {
4185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* the merged sort key does not fit into the destination */
4186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return destLength;
4187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* merge the sort keys with the same number of levels */
4190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(*src1!=0 && *src2!=0) { /* while both have another level */
4191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* copy level from src1 not including 00 or 01 */
4192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((b=*src1)>=2) {
4193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++src1;
4194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=b;
4195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* add a 02 merge separator */
4198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *dest++=2;
4199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* copy level from src2 not including 00 or 01 */
4201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((b=*src2)>=2) {
4202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++src2;
4203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=b;
4204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* if both sort keys have another level, then add a 01 level separator and continue */
4207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(*src1==1 && *src2==1) {
4208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++src1;
4209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++src2;
4210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=1;
4211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
4215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * here, at least one sort key is finished now, but the other one
4216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * might have some contents left from containing more levels;
4217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * that contents is just appended to the result
4218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
4219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*src1!=0) {
4220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* src1 is not finished, therefore *src2==0, and src1 is appended */
4221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src2=src1;
4222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* append src2, "the other, unfinished sort key" */
4224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_strcpy((char *)dest, (const char *)src2);
4225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* trust that neither sort key contained illegally embedded zero bytes */
4227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return destLength;
4228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* sortkey API */
4231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
4232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getSortKey(const    UCollator    *coll,
4233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar        *source,
4234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t        sourceLength,
4235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint8_t        *result,
4236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t        resultLength)
4237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
4238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
4239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
4240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
4241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
4242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
4245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t keySize   = 0;
4246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(source != NULL) {
4248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // source == NULL is actually an error situation, but we would need to
4249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // have an error code to return it. Until we introduce a new
4250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // API, it stays like this
4251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* this uses the function pointer that is set in updateinternalstate */
4253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* currently, there are two funcs: */
4254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*ucol_calcSortKey(...);*/
4255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*ucol_calcSortKeySimpleTertiary(...);*/
4256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        keySize = coll->sortKeyGen(coll, source, sourceLength, &result, resultLength, FALSE, &status);
4258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && result && resultLength > 0) {
4259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // That's not good. Something unusual happened.
4260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // We don't know how much we initialized before we failed.
4261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // NULL terminate for safety.
4262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // We have no way say that we have generated a partial sort key.
4263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            //result[0] = 0;
4264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            //keySize = 0;
4265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //}
4266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
4268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_EXIT_STATUS(status);
4269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return keySize;
4270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this function is called by the C++ API for sortkey generation */
4273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
4274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getSortKeyWithAllocation(const UCollator *coll,
4275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const UChar *source, int32_t sourceLength,
4276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              uint8_t **pResult,
4277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              UErrorCode *pErrorCode) {
4278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *pResult = 0;
4279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return coll->sortKeyGen(coll, source, sourceLength, pResult, 0, TRUE, pErrorCode);
4280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_FSEC_BUF_SIZE 256
4283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
428427f654740f2a26ad62a5c155af9199af9e69b889claireho// Is this primary weight compressible?
428527f654740f2a26ad62a5c155af9199af9e69b889claireho// Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).
428627f654740f2a26ad62a5c155af9199af9e69b889claireho// TODO: This should use per-lead-byte flags from FractionalUCA.txt.
428727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline UBool
428827f654740f2a26ad62a5c155af9199af9e69b889clairehoisCompressible(const UCollator * /*coll*/, uint8_t primary1) {
428927f654740f2a26ad62a5c155af9199af9e69b889claireho    return UCOL_BYTE_FIRST_NON_LATIN_PRIMARY <= primary1 && primary1 <= maxRegularPrimary;
429027f654740f2a26ad62a5c155af9199af9e69b889claireho}
429127f654740f2a26ad62a5c155af9199af9e69b889claireho
4292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function tries to get the size of a sortkey. It will be invoked if the size of resulting buffer is 0  */
4293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* or if we run out of space while making a sortkey and want to return ASAP                                   */
4294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) {
4295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
4296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
4297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareSec   = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
4298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareTer   = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
4299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareQuad  = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
4300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  compareIdent = (strength == UCOL_IDENTICAL);
4301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  doCase = (coll->caseLevel == UCOL_ON);
4302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  shifted = (coll->alternateHandling == UCOL_SHIFTED);
4303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UBool  qShifted = shifted  && (compareQuad == 0);
4304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
4305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0);
4306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t fSecsBuff[UCOL_FSEC_BUF_SIZE];
4307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *fSecs = fSecsBuff;
4308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t fSecsLen = 0, fSecsMaxLen = UCOL_FSEC_BUF_SIZE;
4309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *frenchStartPtr = NULL, *frenchEndPtr = NULL;
4310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t variableTopValue = coll->variableTopValue;
4312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
4313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(doHiragana) {
4314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCOL_COMMON_BOT4++;
4315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* allocate one more space for hiragana */
4316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);
4318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t order = UCOL_NO_MORE_CES;
4320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary1 = 0;
4321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary2 = 0;
4322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t secondary = 0;
4323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiary = 0;
4324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t caseShift = 0;
4325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t c2 = 0, c3 = 0, c4 = 0; /* variables for compression */
4326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseSwitch = coll->caseSwitch;
4328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryMask = coll->tertiaryMask;
4329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryCommon = coll->tertiaryCommon;
4330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool wasShifted = FALSE;
4332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool notIsContinuation = FALSE;
4333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t leadPrimary = 0;
4334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(;;) {
4337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        order = ucol_IGetNextCE(coll, s, &status);
4338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(order == UCOL_NO_MORE_CES) {
4339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
4340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(order == 0) {
4343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        notIsContinuation = !isContinuation(order);
4347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(notIsContinuation) {
4350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tertiary = (uint8_t)((order & UCOL_BYTE_SIZE_MASK));
4351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
4352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
4353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
4355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
4356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        primary1 = (uint8_t)(order >> 8);
4357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
435827f654740f2a26ad62a5c155af9199af9e69b889claireho        /* no need to permute since the actual code values don't matter
435927f654740f2a26ad62a5c155af9199af9e69b889claireho        if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
436027f654740f2a26ad62a5c155af9199af9e69b889claireho            primary1 = coll->leadBytePermutationTable[primary1];
436127f654740f2a26ad62a5c155af9199af9e69b889claireho        }
436227f654740f2a26ad62a5c155af9199af9e69b889claireho        */
4363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
436427f654740f2a26ad62a5c155af9199af9e69b889claireho        if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
436527f654740f2a26ad62a5c155af9199af9e69b889claireho                      || (!notIsContinuation && wasShifted)))
4366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            || (wasShifted && primary1 == 0)) { /* amendment to the UCA says that primary ignorables */
4367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* and other ignorables should be removed if following a shifted code point */
4368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */
4369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* we should just completely ignore it */
4370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
4371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(compareQuad == 0) {
4373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(c4 > 0) {
4374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        currentSize += (c2/UCOL_BOT_COUNT4)+1;
4375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        c4 = 0;
4376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    currentSize++;
4378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(primary2 != 0) {
4379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        currentSize++;
4380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                wasShifted = TRUE;
4383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
4384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            wasShifted = FALSE;
4385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
438627f654740f2a26ad62a5c155af9199af9e69b889claireho            /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will   */
4387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* calculate sortkey size */
4388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(primary1 != UCOL_IGNORABLE) {
4389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(notIsContinuation) {
4390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(leadPrimary == primary1) {
4391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        currentSize++;
4392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
4393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(leadPrimary != 0) {
4394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            currentSize++;
4395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(primary2 == UCOL_IGNORABLE) {
4397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* one byter, not compressed */
4398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            currentSize++;
4399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            leadPrimary = 0;
440027f654740f2a26ad62a5c155af9199af9e69b889claireho                        } else if(isCompressible(coll, primary1)) {
440127f654740f2a26ad62a5c155af9199af9e69b889claireho                            /* compress */
4402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            leadPrimary = primary1;
4403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            currentSize+=2;
440427f654740f2a26ad62a5c155af9199af9e69b889claireho                        } else {
440527f654740f2a26ad62a5c155af9199af9e69b889claireho                            leadPrimary = 0;
440627f654740f2a26ad62a5c155af9199af9e69b889claireho                            currentSize+=2;
4407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
4410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    currentSize++;
4411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(primary2 != UCOL_IGNORABLE) {
4412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        currentSize++;
4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(secondary > compareSec) { /* I think that != 0 test should be != IGNORABLE */
4418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isFrenchSec){
4419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (secondary == UCOL_COMMON2 && notIsContinuation) {
4420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        c2++;
4421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
4422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(c2 > 0) {
4423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
4424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                currentSize += (c2/(uint32_t)UCOL_TOP_COUNT2)+1;
4425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
4426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+1;
4427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            c2 = 0;
4429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        currentSize++;
4431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
4433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fSecs[fSecsLen++] = secondary;
4434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(fSecsLen == fSecsMaxLen) {
4435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        uint8_t *fSecsTemp;
4436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(fSecs == fSecsBuff) {
4437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            fSecsTemp = (uint8_t *)uprv_malloc(2*fSecsLen);
4438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
4439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            fSecsTemp = (uint8_t *)uprv_realloc(fSecs, 2*fSecsLen);
4440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(fSecsTemp == NULL) {
4442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            status = U_MEMORY_ALLOCATION_ERROR;
4443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return 0;
4444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fSecs = fSecsTemp;
4446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fSecsMaxLen *= 2;
4447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(notIsContinuation) {
4449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (frenchStartPtr != NULL) {
4450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* reverse secondaries from frenchStartPtr up to frenchEndPtr */
4451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
4452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            frenchStartPtr = NULL;
4453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
4455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (frenchStartPtr == NULL) {
4456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            frenchStartPtr = fSecs+fSecsLen-2;
4457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        frenchEndPtr = fSecs+fSecsLen-1;
4459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) {
4464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // do the case level if we need to do it. We don't want to calculate
4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // case level for primary ignorables if we have only primary strength and case level
4466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // otherwise we would break well formedness of CEs
4467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (caseShift  == 0) {
4468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    currentSize++;
4469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    caseShift = UCOL_CASE_SHIFT_START;
4470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if((tertiary&0x3F) > 0 && notIsContinuation) {
4472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    caseShift--;
4473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((tertiary &0xC0) != 0) {
4474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (caseShift  == 0) {
4475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            currentSize++;
4476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            caseShift = UCOL_CASE_SHIFT_START;
4477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        caseShift--;
4479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
4482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(notIsContinuation) {
4483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tertiary ^= caseSwitch;
4484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tertiary &= tertiaryMask;
4488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(tertiary > compareTer) { /* I think that != 0 test should be != IGNORABLE */
4489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (tertiary == tertiaryCommon && notIsContinuation) {
4490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    c3++;
4491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(c3 > 0) {
4493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if((tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL)
4494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            || (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST)) {
4495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                currentSize += (c3/(uint32_t)coll->tertiaryTopCount)+1;
4496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
4497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            currentSize += (c3/(uint32_t)coll->tertiaryBottomCount)+1;
4498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        c3 = 0;
4500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    currentSize++;
4502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(/*qShifted*/(compareQuad==0)  && notIsContinuation) {
4506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(s->flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
4507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(c4>0) { // Close this part
4508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        currentSize += (c4/UCOL_BOT_COUNT4)+1;
4509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        c4 = 0;
4510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    currentSize++; // Add the Hiragana
4512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { // This wasn't Hiragana, so we can continue adding stuff
4513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    c4++;
4514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!isFrenchSec){
4520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(c2 > 0) {
4521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0);
4522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
4524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uint32_t i = 0;
4525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(frenchStartPtr != NULL) {
4526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
4527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(i = 0; i<fSecsLen; i++) {
4529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            secondary = *(fSecs+fSecsLen-i-1);
4530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* This is compression code. */
4531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (secondary == UCOL_COMMON2) {
4532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                ++c2;
4533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
4534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(c2 > 0) {
4535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
4536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        currentSize += (c2/(uint32_t)UCOL_TOP_COUNT2)+((c2%(uint32_t)UCOL_TOP_COUNT2 != 0)?1:0);
4537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
4538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0);
4539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    c2 = 0;
4541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                currentSize++;
4543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(c2 > 0) {
4546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0);
4547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(fSecs != fSecsBuff) {
4549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_free(fSecs);
4550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(c3 > 0) {
4554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        currentSize += (c3/(uint32_t)coll->tertiaryBottomCount) + ((c3%(uint32_t)coll->tertiaryBottomCount != 0)?1:0);
4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(c4 > 0  && compareQuad == 0) {
4558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        currentSize += (c4/(uint32_t)UCOL_BOT_COUNT4)+((c4%(uint32_t)UCOL_BOT_COUNT4 != 0)?1:0);
4559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(compareIdent) {
4562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        currentSize += u_lengthOfIdenticalLevelRun(s->string, len);
4563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return currentSize;
4565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
4568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void doCaseShift(uint8_t **cases, uint32_t &caseShift) {
4569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (caseShift  == 0) {
4570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *(*cases)++ = UCOL_CASE_BYTE_START;
4571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        caseShift = UCOL_CASE_SHIFT_START;
4572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Adds a value to the buffer if it's safe to add. Increments the number of added values, so that we
4576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// know how many values we wanted to add, even if we didn't add them all
4577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
4578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void addWithIncrement(uint8_t *&primaries, uint8_t *limit, uint32_t &size, const uint8_t value) {
4579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    size++;
4580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(primaries < limit) {
4581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *(primaries)++ = value;
4582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Packs the secondary buffer when processing French locale. Adds the terminator.
4586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
4587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint8_t *packFrench(uint8_t *primaries, uint8_t *primEnd, uint8_t *secondaries, uint32_t *secsize, uint8_t *frenchStartPtr, uint8_t *frenchEndPtr) {
4588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t secondary;
4589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t count2 = 0;
4590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t i = 0, size = 0;
4591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // we use i here since the key size already accounts for terminators, so we'll discard the increment
4592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    addWithIncrement(primaries, primEnd, i, UCOL_LEVELTERMINATOR);
4593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* If there are any unresolved continuation secondaries, reverse them here so that we can reverse the whole secondary thing */
4594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(frenchStartPtr != NULL) {
4595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
4596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for(i = 0; i<*secsize; i++) {
4598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secondary = *(secondaries-i-1);
4599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* This is compression code. */
4600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (secondary == UCOL_COMMON2) {
4601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            ++count2;
4602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
4603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (count2 > 0) {
4604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
4605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count2 > UCOL_TOP_COUNT2) {
4606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
4607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count2 -= (uint32_t)UCOL_TOP_COUNT2;
4608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
4610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count2 > UCOL_BOT_COUNT2) {
4612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
4613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count2 -= (uint32_t)UCOL_BOT_COUNT2;
4614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
4616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                count2 = 0;
4618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            addWithIncrement(primaries, primEnd, size, secondary);
4620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (count2 > 0) {
4623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while (count2 > UCOL_BOT_COUNT2) {
4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count2 -= (uint32_t)UCOL_BOT_COUNT2;
4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
4628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *secsize = size;
4630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return primaries;
4631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0
4634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the sortkey work horse function */
4636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t U_CALLCONV
4637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_calcSortKey(const    UCollator    *coll,
4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar        *source,
4639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t        sourceLength,
4640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint8_t        **result,
4641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t        resultLength,
4642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool allocateSKBuffer,
4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode *status)
4644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
4646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t i = 0; /* general purpose counter */
4648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Stack allocated buffers for buffers we use */
4650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t prim[UCOL_PRIMARY_MAX_BUFFER], second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER], caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BUFFER];
4651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *primaries = *result, *secondaries = second, *tertiaries = tert, *cases = caseB, *quads = quad;
4653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status)) {
4655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
4656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(primaries == NULL && allocateSKBuffer == TRUE) {
4659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        primaries = *result = prim;
4660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        resultLength = UCOL_PRIMARY_MAX_BUFFER;
4661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t secSize = UCOL_SECONDARY_MAX_BUFFER, terSize = UCOL_TERTIARY_MAX_BUFFER,
4664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      caseSize = UCOL_CASE_MAX_BUFFER, quadSize = UCOL_QUAD_MAX_BUFFER;
4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sortKeySize = 1; /* it is always \0 terminated */
4667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
466850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString normSource;
4669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength);
4671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UColAttributeValue strength = coll->strength;
4673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareSec   = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
4675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareTer   = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
4676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareQuad  = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
4677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  compareIdent = (strength == UCOL_IDENTICAL);
4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  doCase = (coll->caseLevel == UCOL_ON);
4679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0);
4680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  shifted = (coll->alternateHandling == UCOL_SHIFTED);
4681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UBool  qShifted = shifted && (compareQuad == 0);
4682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
4683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t variableTopValue = coll->variableTopValue;
4685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no
4686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // qShifted, we don't need to set UCOL_COMMON_BOT4 so high.
4687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
4688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_HIRAGANA_QUAD = 0;
4689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(doHiragana) {
4690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++;
4691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* allocate one more space for hiragana, value for hiragana */
4692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);
4694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* support for special features like caselevel and funky secondaries */
4696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *frenchStartPtr = NULL;
4697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *frenchEndPtr = NULL;
4698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t caseShift = 0;
4699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + /*(qShifted?1:0)*/(compareQuad?0:1) + (compareIdent?1:0));
4701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* If we need to normalize, we'll do it all at once at the beginning! */
470350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const Normalizer2 *norm2;
4704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(compareIdent) {
470550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm2 = Normalizer2Factory::getNFDInstance(*status);
4706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(coll->normalizationMode != UCOL_OFF) {
470750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm2 = Normalizer2Factory::getFCDInstance(*status);
4708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
470950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm2 = NULL;
471050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
471150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(norm2 != NULL) {
471250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        normSource.setTo(FALSE, source, len);
471350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);
471450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(qcYesLength != len) {
471550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeString unnormalized = normSource.tempSubString(qcYesLength);
471650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            normSource.truncate(qcYesLength);
471750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);
471850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            source = normSource.getBuffer();
471950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len = normSource.length();
4720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collIterate s;
472350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, source, len, &s, status);
472450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
472550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
472650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
472727f654740f2a26ad62a5c155af9199af9e69b889claireho    s.flags &= ~UCOL_ITER_NORM;  // source passed the FCD test or else was normalized.
4728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(resultLength == 0 || primaries == NULL) {
473050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);
4731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *primarySafeEnd = primaries + resultLength - 1;
4733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(strength > UCOL_PRIMARY) {
4734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        primarySafeEnd--;
4735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t minBufferSize = UCOL_MAX_BUFFER;
4738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *primStart = primaries;
4740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *secStart = secondaries;
4741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *terStart = tertiaries;
4742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *caseStart = cases;
4743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *quadStart = quads;
4744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t order = 0;
4746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary1 = 0;
4748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary2 = 0;
4749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t secondary = 0;
4750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiary = 0;
4751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseSwitch = coll->caseSwitch;
4752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryMask = coll->tertiaryMask;
4753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int8_t tertiaryAddition = coll->tertiaryAddition;
4754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryTop = coll->tertiaryTop;
4755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryBottom = coll->tertiaryBottom;
4756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryCommon = coll->tertiaryCommon;
4757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseBits = 0;
4758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool finished = FALSE;
4760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool wasShifted = FALSE;
4761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool notIsContinuation = FALSE;
4762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t prevBuffSize = 0;
4764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t count2 = 0, count3 = 0, count4 = 0;
4766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t leadPrimary = 0;
4767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(;;) {
4769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(i=prevBuffSize; i<minBufferSize; ++i) {
4770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            order = ucol_IGetNextCE(coll, &s, status);
4772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(order == UCOL_NO_MORE_CES) {
4773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                finished = TRUE;
4774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
4775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(order == 0) {
4778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                continue;
4779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            notIsContinuation = !isContinuation(order);
4782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(notIsContinuation) {
4784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK);
4785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
4786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
4787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
4790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
4791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            primary1 = (uint8_t)(order >> 8);
4792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
479327f654740f2a26ad62a5c155af9199af9e69b889claireho            uint8_t originalPrimary1 = primary1;
479427f654740f2a26ad62a5c155af9199af9e69b889claireho            if(notIsContinuation && coll->leadBytePermutationTable != NULL) {
479527f654740f2a26ad62a5c155af9199af9e69b889claireho                primary1 = coll->leadBytePermutationTable[primary1];
479627f654740f2a26ad62a5c155af9199af9e69b889claireho            }
4797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
479827f654740f2a26ad62a5c155af9199af9e69b889claireho            if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
479927f654740f2a26ad62a5c155af9199af9e69b889claireho                           || (!notIsContinuation && wasShifted)))
4800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                || (wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */
4801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
4802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* and other ignorables should be removed if following a shifted code point */
4803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */
4804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* we should just completely ignore it */
4805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
4806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(compareQuad == 0) {
4808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(count4 > 0) {
4809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        while (count4 > UCOL_BOT_COUNT4) {
4810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
4811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            count4 -= UCOL_BOT_COUNT4;
4812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));
4814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count4 = 0;
4815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* We are dealing with a variable and we're treating them as shifted */
4817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* This is a shifted ignorable */
4818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(primary1 != 0) { /* we need to check this since we could be in continuation */
4819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *quads++ = primary1;
4820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(primary2 != 0) {
4822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *quads++ = primary2;
4823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                wasShifted = TRUE;
4826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
4827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                wasShifted = FALSE;
4828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
482927f654740f2a26ad62a5c155af9199af9e69b889claireho                /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will   */
4830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* regular and simple sortkey calc */
4831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(primary1 != UCOL_IGNORABLE) {
4832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(notIsContinuation) {
4833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(leadPrimary == primary1) {
4834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *primaries++ = primary2;
4835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
4836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(leadPrimary != 0) {
4837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *primaries++ = (uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);
4838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(primary2 == UCOL_IGNORABLE) {
4840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                /* one byter, not compressed */
4841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *primaries++ = primary1;
4842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                leadPrimary = 0;
484327f654740f2a26ad62a5c155af9199af9e69b889claireho                            } else if(isCompressible(coll, originalPrimary1)) {
484427f654740f2a26ad62a5c155af9199af9e69b889claireho                                /* compress */
4845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *primaries++ = leadPrimary = primary1;
4846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if(primaries <= primarySafeEnd) {
4847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *primaries++ = primary2;
4848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
484927f654740f2a26ad62a5c155af9199af9e69b889claireho                            } else {
485027f654740f2a26ad62a5c155af9199af9e69b889claireho                                leadPrimary = 0;
485127f654740f2a26ad62a5c155af9199af9e69b889claireho                                *primaries++ = primary1;
485227f654740f2a26ad62a5c155af9199af9e69b889claireho                                if(primaries <= primarySafeEnd) {
485327f654740f2a26ad62a5c155af9199af9e69b889claireho                                    *primaries++ = primary2;
485427f654740f2a26ad62a5c155af9199af9e69b889claireho                                }
4855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
4858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *primaries++ = primary1;
4859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if((primary2 != UCOL_IGNORABLE) && (primaries <= primarySafeEnd)) {
4860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *primaries++ = primary2; /* second part */
4861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(secondary > compareSec) {
4866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isFrenchSec) {
4867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* This is compression code. */
4868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (secondary == UCOL_COMMON2 && notIsContinuation) {
4869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            ++count2;
4870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
4871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (count2 > 0) {
4872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
4873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    while (count2 > UCOL_TOP_COUNT2) {
4874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
4875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        count2 -= (uint32_t)UCOL_TOP_COUNT2;
4876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
4877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - (count2-1));
4878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
4879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    while (count2 > UCOL_BOT_COUNT2) {
4880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
4881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        count2 -= (uint32_t)UCOL_BOT_COUNT2;
4882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
4883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));
4884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
4885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                count2 = 0;
4886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *secondaries++ = secondary;
4888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
4890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *secondaries++ = secondary;
4891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* Do the special handling for French secondaries */
4892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* We need to get continuation elements and do intermediate restore */
4893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* abc1c2c3de with french secondaries need to be edc1c2c3ba NOT edc3c2c1ba */
4894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(notIsContinuation) {
4895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (frenchStartPtr != NULL) {
4896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                /* reverse secondaries from frenchStartPtr up to frenchEndPtr */
4897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
4898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                frenchStartPtr = NULL;
4899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
4901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (frenchStartPtr == NULL) {
4902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                frenchStartPtr = secondaries - 2;
4903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            frenchEndPtr = secondaries-1;
4905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) {
4910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // do the case level if we need to do it. We don't want to calculate
4911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // case level for primary ignorables if we have only primary strength and case level
4912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // otherwise we would break well formedness of CEs
4913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    doCaseShift(&cases, caseShift);
4914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(notIsContinuation) {
4915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        caseBits = (uint8_t)(tertiary & 0xC0);
4916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(tertiary != 0) {
4918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(coll->caseFirst == UCOL_UPPER_FIRST) {
4919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if((caseBits & 0xC0) == 0) {
4920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *(cases-1) |= 1 << (--caseShift);
4921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
4922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *(cases-1) |= 0 << (--caseShift);
4923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    /* second bit */
4924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    doCaseShift(&cases, caseShift);
4925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *(cases-1) |= ((caseBits>>6)&1) << (--caseShift);
4926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
4927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
4928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if((caseBits & 0xC0) == 0) {
4929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *(cases-1) |= 0 << (--caseShift);
4930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
4931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *(cases-1) |= 1 << (--caseShift);
4932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    /* second bit */
4933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    doCaseShift(&cases, caseShift);
4934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *(cases-1) |= ((caseBits>>7)&1) << (--caseShift);
4935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
4936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(notIsContinuation) {
4942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        tertiary ^= caseSwitch;
4943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tertiary &= tertiaryMask;
4947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tertiary > compareTer) {
4948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* This is compression code. */
4949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* sequence size check is included in the if clause */
4950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (tertiary == tertiaryCommon && notIsContinuation) {
4951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        ++count3;
4952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
4953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
4954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tertiary += tertiaryAddition;
4955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if(tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
4956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tertiary -= tertiaryAddition;
4957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (count3 > 0) {
4959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if ((tertiary > tertiaryCommon)) {
4960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                while (count3 > coll->tertiaryTopCount) {
4961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount);
4962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    count3 -= (uint32_t)coll->tertiaryTopCount;
4963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
4964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *tertiaries++ = (uint8_t)(tertiaryTop - (count3-1));
4965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
4966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                while (count3 > coll->tertiaryBottomCount) {
4967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount);
4968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    count3 -= (uint32_t)coll->tertiaryBottomCount;
4969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
4970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));
4971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            count3 = 0;
4973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *tertiaries++ = tertiary;
4975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(/*qShifted*/(compareQuad==0)  && notIsContinuation) {
4979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
4980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(count4>0) { // Close this part
4981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while (count4 > UCOL_BOT_COUNT4) {
4982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
4983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                count4 -= UCOL_BOT_COUNT4;
4984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));
4986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            count4 = 0;
4987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *quads++ = UCOL_HIRAGANA_QUAD; // Add the Hiragana
4989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else { // This wasn't Hiragana, so we can continue adding stuff
4990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count4++;
4991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(primaries > primarySafeEnd) { /* We have stepped over the primary buffer */
4996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(allocateSKBuffer == FALSE) { /* need to save our butts if we cannot reallocate */
499750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    IInit_collIterate(coll, (UChar *)source, len, &s, status);
499850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(U_FAILURE(*status)) {
499950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
500050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        finished = TRUE;
500150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
500250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
500327f654740f2a26ad62a5c155af9199af9e69b889claireho                    s.flags &= ~UCOL_ITER_NORM;
5004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);
5005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *status = U_BUFFER_OVERFLOW_ERROR;
5006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    finished = TRUE;
5007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
5008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* It's much nicer if we can actually reallocate */
500950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t sks = sortKeySize+(int32_t)((primaries - primStart)+(secondaries - secStart)+(tertiaries - terStart)+(cases-caseStart)+(quads-quadStart));
5010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sks, status);
5011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(U_SUCCESS(*status)) {
5012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *result = primStart;
5013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        primarySafeEnd = primStart + resultLength - 1;
5014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(strength > UCOL_PRIMARY) {
5015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            primarySafeEnd--;
5016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* We ran out of memory!? We can't recover. */
5019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        finished = TRUE;
5021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
5022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(finished) {
5027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
5029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            prevBuffSize = minBufferSize;
5030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t frenchStartOffset = 0, frenchEndOffset = 0;
5032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (frenchStartPtr != NULL) {
503350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                frenchStartOffset = (uint32_t)(frenchStartPtr - secStart);
503450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                frenchEndOffset = (uint32_t)(frenchEndPtr - secStart);
5035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            secStart = reallocateBuffer(&secondaries, secStart, second, &secSize, 2*secSize, status);
5037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            terStart = reallocateBuffer(&tertiaries, terStart, tert, &terSize, 2*terSize, status);
5038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            caseStart = reallocateBuffer(&cases, caseStart, caseB, &caseSize, 2*caseSize, status);
5039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            quadStart = reallocateBuffer(&quads, quadStart, quad, &quadSize, 2*quadSize, status);
5040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(U_FAILURE(*status)) {
5041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* We ran out of memory!? We can't recover. */
5042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
5044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (frenchStartPtr != NULL) {
5046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                frenchStartPtr = secStart + frenchStartOffset;
5047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                frenchEndPtr = secStart + frenchEndOffset;
5048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            minBufferSize *= 2;
5050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Here, we are generally done with processing */
5054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* bailing out would not be too productive */
5055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(*status)) {
505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sortKeySize += (uint32_t)(primaries - primStart);
5058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* we have done all the CE's, now let's put them together to form a key */
5059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(compareSec == 0) {
5060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (count2 > 0) {
5061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (count2 > UCOL_BOT_COUNT2) {
5062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
5063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    count2 -= (uint32_t)UCOL_BOT_COUNT2;
5064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));
5066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
506750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint32_t secsize = (uint32_t)(secondaries-secStart);
5068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(!isFrenchSec) { // Regular situation, we know the length of secondaries
5069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sortKeySize += secsize;
5070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sortKeySize <= resultLength) {
5071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(primaries++) = UCOL_LEVELTERMINATOR;
5072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uprv_memcpy(primaries, secStart, secsize);
5073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    primaries += secsize;
5074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(allocateSKBuffer == TRUE) { /* need to save our butts if we cannot reallocate */
5076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
5077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(U_SUCCESS(*status)) {
5078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *result = primStart;
5079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *(primaries++) = UCOL_LEVELTERMINATOR;
5080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uprv_memcpy(primaries, secStart, secsize);
5081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            primaries += secsize;
5082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        else {
5084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* We ran out of memory!? We can't recover. */
5085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto cleanup;
5087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_BUFFER_OVERFLOW_ERROR;
5090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else { // French secondary is on. We will need to pack French. packFrench will add the level terminator
5093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint8_t *newPrim = packFrench(primaries, primStart+resultLength, secondaries, &secsize, frenchStartPtr, frenchEndPtr);
5094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sortKeySize += secsize;
5095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sortKeySize <= resultLength) { // if we managed to pack fine
5096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    primaries = newPrim; // update the primary pointer
5097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { // overflow, need to reallocate and redo
5098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(allocateSKBuffer == TRUE) { /* need to save our butts if we cannot reallocate */
5099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
5100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(U_SUCCESS(*status)) {
5101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            primaries = packFrench(primaries, primStart+resultLength, secondaries, &secsize, frenchStartPtr, frenchEndPtr);
5102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        else {
5104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* We ran out of memory!? We can't recover. */
5105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto cleanup;
5107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_BUFFER_OVERFLOW_ERROR;
5110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(doCase) {
511650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint32_t casesize = (uint32_t)(cases - caseStart);
5117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sortKeySize += casesize;
5118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sortKeySize <= resultLength) {
5119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *(primaries++) = UCOL_LEVELTERMINATOR;
5120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uprv_memcpy(primaries, caseStart, casesize);
5121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                primaries += casesize;
5122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
5123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(allocateSKBuffer == TRUE) {
5124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
5125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(U_SUCCESS(*status)) {
5126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *result = primStart;
5127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(primaries++) = UCOL_LEVELTERMINATOR;
5128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        uprv_memcpy(primaries, caseStart, casesize);
5129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else {
5131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* We ran out of memory!? We can't recover. */
5132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto cleanup;
5134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *status = U_BUFFER_OVERFLOW_ERROR;
5137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(compareTer == 0) {
5142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (count3 > 0) {
5143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (coll->tertiaryCommon != UCOL_COMMON_BOT3) {
5144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count3 >= coll->tertiaryTopCount) {
5145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount);
5146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count3 -= (uint32_t)coll->tertiaryTopCount;
5147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *tertiaries++ = (uint8_t)(tertiaryTop - count3);
5149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count3 > coll->tertiaryBottomCount) {
5151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount);
5152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count3 -= (uint32_t)coll->tertiaryBottomCount;
5153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));
5155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
515750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint32_t tersize = (uint32_t)(tertiaries - terStart);
5158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sortKeySize += tersize;
5159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sortKeySize <= resultLength) {
5160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(primaries++) = UCOL_LEVELTERMINATOR;
5161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uprv_memcpy(primaries, terStart, tersize);
5162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                primaries += tersize;
5163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
5164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(allocateSKBuffer == TRUE) {
5165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
5166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(U_SUCCESS(*status)) {
5167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *result = primStart;
5168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(primaries++) = UCOL_LEVELTERMINATOR;
5169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        uprv_memcpy(primaries, terStart, tersize);
5170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else {
5172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* We ran out of memory!? We can't recover. */
5173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto cleanup;
5175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *status = U_BUFFER_OVERFLOW_ERROR;
5178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(compareQuad == 0/*qShifted == TRUE*/) {
5182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(count4 > 0) {
5183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count4 > UCOL_BOT_COUNT4) {
5184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
5185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count4 -= UCOL_BOT_COUNT4;
5186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));
5188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
518950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                uint32_t quadsize = (uint32_t)(quads - quadStart);
5190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sortKeySize += quadsize;
5191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sortKeySize <= resultLength) {
5192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(primaries++) = UCOL_LEVELTERMINATOR;
5193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uprv_memcpy(primaries, quadStart, quadsize);
5194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    primaries += quadsize;
5195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(allocateSKBuffer == TRUE) {
5197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
5198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(U_SUCCESS(*status)) {
5199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *result = primStart;
5200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *(primaries++) = UCOL_LEVELTERMINATOR;
5201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uprv_memcpy(primaries, quadStart, quadsize);
5202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        else {
5204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* We ran out of memory!? We can't recover. */
5205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto cleanup;
5207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_BUFFER_OVERFLOW_ERROR;
5210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(compareIdent) {
5215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sortKeySize += u_lengthOfIdenticalLevelRun(s.string, len);
5216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sortKeySize <= resultLength) {
5217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(primaries++) = UCOL_LEVELTERMINATOR;
5218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    primaries += u_writeIdenticalLevelRun(s.string, len, primaries);
5219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(allocateSKBuffer == TRUE) {
5221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, sortKeySize, status);
5222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(U_SUCCESS(*status)) {
5223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *result = primStart;
5224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *(primaries++) = UCOL_LEVELTERMINATOR;
5225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            u_writeIdenticalLevelRun(s.string, len, primaries);
5226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        else {
5228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* We ran out of memory!? We can't recover. */
5229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto cleanup;
5231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_BUFFER_OVERFLOW_ERROR;
5234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *(primaries++) = '\0';
5239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(allocateSKBuffer == TRUE) {
5242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *result = (uint8_t*)uprv_malloc(sortKeySize);
5243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* test for NULL */
5244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (*result == NULL) {
5245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
5246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            goto cleanup;
5247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(*result, primStart, sortKeySize);
5249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(primStart != prim) {
5250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_free(primStart);
5251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querucleanup:
5255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (allocateSKBuffer == FALSE && resultLength > 0 && U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) {
5256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* NULL terminate for safety */
5257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        **result = 0;
5258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(terStart != tert) {
5260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(terStart);
5261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(secStart);
5262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(caseStart);
5263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(quadStart);
5264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* To avoid memory leak, free the offset buffer if necessary. */
5267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucol_freeOffsetBuffer(&s);
5268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return sortKeySize;
5270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
5271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t U_CALLCONV
5274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
5275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar        *source,
5276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t        sourceLength,
5277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint8_t        **result,
5278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t        resultLength,
5279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool allocateSKBuffer,
5280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode *status)
5281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
5282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ALIGN_CODE(16);
5283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
5285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t i = 0; /* general purpose counter */
5286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Stack allocated buffers for buffers we use */
5288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t prim[UCOL_PRIMARY_MAX_BUFFER], second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER];
5289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *primaries = *result, *secondaries = second, *tertiaries = tert;
5291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status)) {
5293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
5294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(primaries == NULL && allocateSKBuffer == TRUE) {
5297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        primaries = *result = prim;
5298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        resultLength = UCOL_PRIMARY_MAX_BUFFER;
5299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t secSize = UCOL_SECONDARY_MAX_BUFFER, terSize = UCOL_TERTIARY_MAX_BUFFER;
5302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sortKeySize = 3; /* it is always \0 terminated plus separators for secondary and tertiary */
5304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
530550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString normSource;
5306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t len =  sourceLength;
5308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* If we need to normalize, we'll do it all at once at the beginning! */
531050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(coll->normalizationMode != UCOL_OFF) {
531150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        normSource.setTo(len < 0, source, len);
531250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const Normalizer2 *norm2 = Normalizer2Factory::getFCDInstance(*status);
531350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);
531450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(qcYesLength != normSource.length()) {
531550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeString unnormalized = normSource.tempSubString(qcYesLength);
531650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            normSource.truncate(qcYesLength);
531750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);
531850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            source = normSource.getBuffer();
531950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len = normSource.length();
5320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collIterate s;
532350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, (UChar *)source, len, &s, status);
532450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
532550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
532650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
532727f654740f2a26ad62a5c155af9199af9e69b889claireho    s.flags &= ~UCOL_ITER_NORM;  // source passed the FCD test or else was normalized.
5328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(resultLength == 0 || primaries == NULL) {
533050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);
5331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *primarySafeEnd = primaries + resultLength - 2;
5334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t minBufferSize = UCOL_MAX_BUFFER;
5336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *primStart = primaries;
5338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *secStart = secondaries;
5339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *terStart = tertiaries;
5340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t order = 0;
5342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary1 = 0;
5344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary2 = 0;
5345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t secondary = 0;
5346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiary = 0;
5347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseSwitch = coll->caseSwitch;
5348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryMask = coll->tertiaryMask;
5349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int8_t tertiaryAddition = coll->tertiaryAddition;
5350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryTop = coll->tertiaryTop;
5351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryBottom = coll->tertiaryBottom;
5352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryCommon = coll->tertiaryCommon;
5353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t prevBuffSize = 0;
5355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool finished = FALSE;
5357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool notIsContinuation = FALSE;
5358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t count2 = 0, count3 = 0;
5360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t leadPrimary = 0;
5361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(;;) {
5363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(i=prevBuffSize; i<minBufferSize; ++i) {
5364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            order = ucol_IGetNextCE(coll, &s, status);
5366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(order == 0) {
5368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                continue;
5369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(order == UCOL_NO_MORE_CES) {
5372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                finished = TRUE;
5373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
5374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            notIsContinuation = !isContinuation(order);
5377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(notIsContinuation) {
5379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tertiary = (uint8_t)((order & tertiaryMask));
5380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
5381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
5382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
538327f654740f2a26ad62a5c155af9199af9e69b889claireho
5384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
5385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
5386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            primary1 = (uint8_t)(order >> 8);
5387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
538827f654740f2a26ad62a5c155af9199af9e69b889claireho            uint8_t originalPrimary1 = primary1;
538927f654740f2a26ad62a5c155af9199af9e69b889claireho            if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
539027f654740f2a26ad62a5c155af9199af9e69b889claireho                primary1 = coll->leadBytePermutationTable[primary1];
539127f654740f2a26ad62a5c155af9199af9e69b889claireho            }
539227f654740f2a26ad62a5c155af9199af9e69b889claireho
5393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
539427f654740f2a26ad62a5c155af9199af9e69b889claireho            /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will   */
5395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above.               */
5396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* regular and simple sortkey calc */
5397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(primary1 != UCOL_IGNORABLE) {
5398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(notIsContinuation) {
5399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(leadPrimary == primary1) {
5400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *primaries++ = primary2;
5401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(leadPrimary != 0) {
5403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *primaries++ = (uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);
5404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(primary2 == UCOL_IGNORABLE) {
5406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* one byter, not compressed */
5407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *primaries++ = primary1;
5408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            leadPrimary = 0;
540927f654740f2a26ad62a5c155af9199af9e69b889claireho                        } else if(isCompressible(coll, originalPrimary1)) {
541027f654740f2a26ad62a5c155af9199af9e69b889claireho                            /* compress */
5411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *primaries++ = leadPrimary = primary1;
5412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *primaries++ = primary2;
541327f654740f2a26ad62a5c155af9199af9e69b889claireho                        } else {
541427f654740f2a26ad62a5c155af9199af9e69b889claireho                            leadPrimary = 0;
541527f654740f2a26ad62a5c155af9199af9e69b889claireho                            *primaries++ = primary1;
541627f654740f2a26ad62a5c155af9199af9e69b889claireho                            *primaries++ = primary2;
5417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
5420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *primaries++ = primary1;
5421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(primary2 != UCOL_IGNORABLE) {
5422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *primaries++ = primary2; /* second part */
5423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(secondary > 0) { /* I think that != 0 test should be != IGNORABLE */
5428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* This is compression code. */
5429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (secondary == UCOL_COMMON2 && notIsContinuation) {
5430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    ++count2;
5431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (count2 > 0) {
5433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
5434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while (count2 > UCOL_TOP_COUNT2) {
5435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
5436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                count2 -= (uint32_t)UCOL_TOP_COUNT2;
5437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - (count2-1));
5439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
5440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while (count2 > UCOL_BOT_COUNT2) {
5441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
5442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                count2 -= (uint32_t)UCOL_BOT_COUNT2;
5443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));
5445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count2 = 0;
5447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *secondaries++ = secondary;
5449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(notIsContinuation) {
5453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tertiary ^= caseSwitch;
5454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(tertiary > 0) {
5457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* This is compression code. */
5458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* sequence size check is included in the if clause */
5459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (tertiary == tertiaryCommon && notIsContinuation) {
5460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    ++count3;
5461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
5463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        tertiary += tertiaryAddition;
5464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else if (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
5465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        tertiary -= tertiaryAddition;
5466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (count3 > 0) {
5468c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if ((tertiary > tertiaryCommon)) {
5469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while (count3 > coll->tertiaryTopCount) {
5470c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount);
5471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                count3 -= (uint32_t)coll->tertiaryTopCount;
5472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *tertiaries++ = (uint8_t)(tertiaryTop - (count3-1));
5474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
5475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while (count3 > coll->tertiaryBottomCount) {
5476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount);
5477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                count3 -= (uint32_t)coll->tertiaryBottomCount;
5478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));
5480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count3 = 0;
5482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *tertiaries++ = tertiary;
5484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(primaries > primarySafeEnd) { /* We have stepped over the primary buffer */
5488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(allocateSKBuffer == FALSE) { /* need to save our butts if we cannot reallocate */
548950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    IInit_collIterate(coll, (UChar *)source, len, &s, status);
549050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(U_FAILURE(*status)) {
549150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
549250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        finished = TRUE;
549350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
549450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
549527f654740f2a26ad62a5c155af9199af9e69b889claireho                    s.flags &= ~UCOL_ITER_NORM;
5496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);
5497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *status = U_BUFFER_OVERFLOW_ERROR;
5498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    finished = TRUE;
5499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
5500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* It's much nicer if we can actually reallocate */
550150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t sks = sortKeySize+(int32_t)((primaries - primStart)+(secondaries - secStart)+(tertiaries - terStart));
5502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sks, status);
5503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(U_SUCCESS(*status)) {
5504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *result = primStart;
5505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        primarySafeEnd = primStart + resultLength - 2;
5506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* We ran out of memory!? We can't recover. */
5508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        finished = TRUE;
5510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
5511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(finished) {
5516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
5518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            prevBuffSize = minBufferSize;
5519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            secStart = reallocateBuffer(&secondaries, secStart, second, &secSize, 2*secSize, status);
5520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            terStart = reallocateBuffer(&tertiaries, terStart, tert, &terSize, 2*terSize, status);
5521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            minBufferSize *= 2;
5522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(U_FAILURE(*status)) { // if we cannot reallocate buffers, we can at least give the sortkey size
5523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* We ran out of memory!? We can't recover. */
5524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
5526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(*status)) {
553150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sortKeySize += (uint32_t)(primaries - primStart);
5532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* we have done all the CE's, now let's put them together to form a key */
5533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (count2 > 0) {
5534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while (count2 > UCOL_BOT_COUNT2) {
5535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
5536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                count2 -= (uint32_t)UCOL_BOT_COUNT2;
5537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));
5539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
554050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint32_t secsize = (uint32_t)(secondaries-secStart);
5541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sortKeySize += secsize;
5542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(sortKeySize <= resultLength) {
5543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *(primaries++) = UCOL_LEVELTERMINATOR;
5544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_memcpy(primaries, secStart, secsize);
5545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            primaries += secsize;
5546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
5547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(allocateSKBuffer == TRUE) {
5548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
5549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(U_SUCCESS(*status)) {
5550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(primaries++) = UCOL_LEVELTERMINATOR;
5551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *result = primStart;
5552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uprv_memcpy(primaries, secStart, secsize);
5553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                else {
5555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* We ran out of memory!? We can't recover. */
5556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto cleanup;
5558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *status = U_BUFFER_OVERFLOW_ERROR;
5561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (count3 > 0) {
5565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) {
5566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (count3 >= coll->tertiaryTopCount) {
5567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount);
5568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    count3 -= (uint32_t)coll->tertiaryTopCount;
5569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *tertiaries++ = (uint8_t)(tertiaryTop - count3);
5571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (count3 > coll->tertiaryBottomCount) {
5573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount);
5574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    count3 -= (uint32_t)coll->tertiaryBottomCount;
5575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));
5577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
557950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint32_t tersize = (uint32_t)(tertiaries - terStart);
5580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sortKeySize += tersize;
5581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(sortKeySize <= resultLength) {
5582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *(primaries++) = UCOL_LEVELTERMINATOR;
5583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_memcpy(primaries, terStart, tersize);
5584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            primaries += tersize;
5585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
5586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(allocateSKBuffer == TRUE) {
5587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
5588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(U_SUCCESS(*status)) {
5589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *result = primStart;
5590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(primaries++) = UCOL_LEVELTERMINATOR;
5591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uprv_memcpy(primaries, terStart, tersize);
5592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                else {
5594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* We ran out of memory!? We can't recover. */
5595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
5596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto cleanup;
5597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
559927f654740f2a26ad62a5c155af9199af9e69b889claireho                *status = U_BUFFER_OVERFLOW_ERROR;
5600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *(primaries++) = '\0';
5604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(allocateSKBuffer == TRUE) {
5607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *result = (uint8_t*)uprv_malloc(sortKeySize);
5608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* test for NULL */
5609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (*result == NULL) {
5610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
5611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            goto cleanup;
5612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(*result, primStart, sortKeySize);
5614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(primStart != prim) {
5615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_free(primStart);
5616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querucleanup:
5620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (allocateSKBuffer == FALSE && resultLength > 0 && U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) {
5621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* NULL terminate for safety */
5622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        **result = 0;
5623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(terStart != tert) {
5625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(terStart);
5626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(secStart);
5627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* To avoid memory leak, free the offset buffer if necessary. */
5630b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucol_freeOffsetBuffer(&s);
5631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return sortKeySize;
5633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
5634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline
5636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) {
5637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool notIsContinuation = !isContinuation(CE);
5638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF);
563927f654740f2a26ad62a5c155af9199af9e69b889claireho    if((LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0)
564027f654740f2a26ad62a5c155af9199af9e69b889claireho               || (!notIsContinuation && *wasShifted)))
5641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || (*wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */
5642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
5643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // The stuff below should probably be in the sortkey code... maybe not...
5644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(primary1 != 0) { /* if we were shifted and we got an ignorable code point */
5645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* we should just completely ignore it */
5646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *wasShifted = TRUE;
5647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            //continue;
5648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //*wasShifted = TRUE;
5650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return TRUE;
5651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
5652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *wasShifted = FALSE;
5653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return FALSE;
5654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
5656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline
5657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *dest) {
5658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(level < maxLevel) {
5659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        dest[i++] = UCOL_LEVELTERMINATOR;
5660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
5661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        dest[i++] = 0;
5662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
5664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** enumeration of level identifiers for partial sort key generation */
5666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
5667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UCOL_PSK_PRIMARY = 0,
5668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_SECONDARY = 1,
5669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_CASE = 2,
5670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_TERTIARY = 3,
5671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_QUATERNARY = 4,
5672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_QUIN = 5,      /** This is an extra level, not used - but we have three bits to blow */
5673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_IDENTICAL = 6,
5674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_NULL = 7,      /** level for the end of sort key. Will just produce zeros */
5675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_LIMIT
5676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
5677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** collation state enum. *_SHIFT value is how much to shift right
5679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  to get the state piece to the right. *_MASK value should be
5680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  ANDed with the shifted state. This data is stored in state[1]
5681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  field.
5682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
5683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
5684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_LEVEL_SHIFT = 0,      /** level identificator. stores an enum value from above */
5685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_LEVEL_MASK = 7,       /** three bits */
5686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */
5687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,
5688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** can be only 0 or 1, since we get up to two bytes from primary or quaternary
5689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  This field is also used to denote that the French secondary level is finished
5690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
5691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_WAS_SHIFTED_SHIFT = 4,/** was the last value shifted */
5692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_WAS_SHIFTED_MASK = 1, /** can be 0 or 1 (Boolean) */
5693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already written */
5694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */
5695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** When we do French we need to reverse secondary values. However, continuations
5696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba
5697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
5698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_BOCSU_BYTES_SHIFT = 7,
5699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_BOCSU_BYTES_MASK = 3,
5700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_CONSUMED_CES_SHIFT = 9,
5701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF
5702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
5703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// macro calculating the number of expansion CEs available
5705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn
5706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** main sortkey part procedure. On the first call,
5709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  you should pass in a collator, an iterator, empty state
5710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  state[0] == state[1] == 0, a buffer to hold results
5711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  number of bytes you need and an error code pointer.
5712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  Make sure your buffer is big enough to hold the wanted
5713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  number of sortkey bytes. I don't check.
5714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  The only meaningful status you can get back is
5715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  U_BUFFER_OVERFLOW_ERROR, which basically means that you
5716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  have been dealt a raw deal and that you probably won't
5717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  be able to use partial sortkey generation for this
5718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  particular combination of string and collator. This
5719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  is highly unlikely, but you should still check the error code.
5720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  Any other status means that you're not in a sane situation
5721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  anymore. After the first call, preserve state values and
5722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  use them on subsequent calls to obtain more bytes of a sortkey.
5723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  Use until the number of bytes written is smaller than the requested
5724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  number of bytes. Generated sortkey is not compatible with the
5725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  one generated by ucol_getSortKey, as we don't do any compression.
5726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  However, levels are still terminated by a 1 (one) and the sortkey
5727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  is terminated by a 0 (zero). Identical level is the same as in the
5728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  regular sortkey - internal bocu-1 implementation is used.
5729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  For curious, although you cannot do much about this, here is
5730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  the structure of state words.
5731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  state[0] - iterator state. Depends on the iterator implementation,
5732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             but allows the iterator to continue where it stopped in
5733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             the last iteration.
5734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  state[1] - collation processing state. Here is the distribution
5735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             of the bits:
5736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   0, 1, 2 - level of the sortkey - primary, secondary, case, tertiary
5737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             quaternary, quin (we don't use this one), identical and
5738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             null (producing only zeroes - first one to terminate the
5739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             sortkey and subsequent to fill the buffer).
5740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   3       - byte count. Number of bytes written on the primary level.
5741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   4       - was shifted. Whether the previous iteration finished in the
5742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             shifted state.
5743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   5, 6    - French continuation bytes written. See the comment in the enum
5744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   7,8     - Bocsu bytes used. Number of bytes from a bocu sequence on
5745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             the identical level.
5746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *   9..31   - CEs consumed. Number of getCE or next32 operations performed
5747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             since thes last successful update of the iterator state.
5748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
5749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
5750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_nextSortKeyPart(const UCollator *coll,
5751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     UCharIterator *iter,
5752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     uint32_t state[2],
5753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     uint8_t *dest, int32_t count,
5754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                     UErrorCode *status)
5755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
5756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* error checking */
5757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(status==NULL || U_FAILURE(*status)) {
5758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
5759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
5761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( coll==NULL || iter==NULL ||
5762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        state==NULL ||
5763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        count<0 || (count>0 && dest==NULL)
5764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
5765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status=U_ILLEGAL_ARGUMENT_ERROR;
5766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UTRACE_EXIT_STATUS(status);
5767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
5768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
5771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  coll, iter, state[0], state[1], dest, count);
5772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count==0) {
5774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* nothing to do */
5775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UTRACE_EXIT_VALUE(0);
5776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
5777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** Setting up situation according to the state we got from the previous iteration */
5779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The state of the iterator from the previous invocation
5780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t iterState = state[0];
5781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Has the last iteration ended in the shifted state
5782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_SHIFTED_MASK)?TRUE:FALSE;
5783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // What is the current level of the sortkey?
5784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK;
5785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Have we written only one byte from a two byte primary in the previous iteration?
5786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Also on secondary level - have we finished with the French secondary?
5787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK;
5788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // number of bytes in the continuation buffer for French
5789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK;
5790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Number of bytes already written from a bocsu sequence. Since
5791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // the longes bocsu sequence is 4 long, this can be up to 3.
5792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK_BOCSU_BYTES_MASK;
5793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Number of elements that need to be consumed in this iteration because
5794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // the iterator returned UITER_NO_STATE at the end of the last iteration,
5795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // so we had to save the last valid state.
5796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED_CES_MASK;
5797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** values that depend on the collator attributes */
5799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // strength of the collator.
5800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status);
5801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // maximal level of the partial sortkey. Need to take whether case level is done
5802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t maxLevel = 0;
5803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(strength < UCOL_TERTIARY) {
5804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
5805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = UCOL_PSK_CASE;
5806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
5807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = strength;
5808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
5810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strength == UCOL_TERTIARY) {
5811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = UCOL_PSK_TERTIARY;
5812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(strength == UCOL_QUATERNARY) {
5813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = UCOL_PSK_QUATERNARY;
5814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else { // identical
5815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = UCOL_IDENTICAL;
5816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation
5819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_HIRAGANA_QUAD =
5820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF;
5821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Boundary value that decides whether a CE is shifted or not
5822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0;
5823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Are we doing French collation?
5824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON);
5825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** initializing the collation state */
5827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool notIsContinuation = FALSE;
5828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t CE = UCOL_NO_MORE_CES;
5829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collIterate s;
583150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, NULL, -1, &s, status);
583250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
583350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTRACE_EXIT_STATUS(*status);
583450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
583550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
5836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.iterator = iter;
5837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.flags |= UCOL_USE_ITERATOR;
5838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This variable tells us whether we have produced some other levels in this iteration
5839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // before we moved to the identical level. In that case, we need to switch the
5840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // type of the iterator.
5841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool doingIdenticalFromStart = FALSE;
5842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Normalizing iterator
5843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The division for the array length may truncate the array size to
5844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
5845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // for all platforms anyway.
5846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
5847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UNormIterator *normIter = NULL;
5848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If the normalization is turned on for the collator and we are below identical level
5849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // we will use a FCD normalizing iterator
5850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && level < UCOL_PSK_IDENTICAL) {
5851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
5852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status);
5853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.flags &= ~UCOL_ITER_NORM;
5854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(U_FAILURE(*status)) {
5855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UTRACE_EXIT_STATUS(*status);
5856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return 0;
5857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(level == UCOL_PSK_IDENTICAL) {
5859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // for identical level, we need a NFD iterator. We need to instantiate it here, since we
5860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // will be updating the state - and this cannot be done on an ordinary iterator.
5861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
5862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
5863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.flags &= ~UCOL_ITER_NORM;
5864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(U_FAILURE(*status)) {
5865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UTRACE_EXIT_STATUS(*status);
5866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return 0;
5867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        doingIdenticalFromStart = TRUE;
5869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This is the tentative new state of the iterator. The problem
5872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // is that the iterator might return an undefined state, in
5873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // which case we should save the last valid state and increase
5874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // the iterator skip value.
5875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t newState = 0;
5876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First, we set the iterator to the last valid position
5878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // from the last iteration. This was saved in state[0].
5879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(iterState == 0) {
5880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* initial state */
5881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone) {
5882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            s.iterator->move(s.iterator, 0, UITER_LIMIT);
5883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
5884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            s.iterator->move(s.iterator, 0, UITER_START);
5885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
5887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* reset to previous state */
5888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.iterator->setState(s.iterator, iterState, status);
5889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(U_FAILURE(*status)) {
5890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UTRACE_EXIT_STATUS(*status);
5891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return 0;
5892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This variable tells us whether we can attempt to update the state
5898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // of iterator. Situations where we don't want to update iterator state
5899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // are the existence of expansion CEs that are not yet processed, and
5900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // finishing the case level without enough space in the buffer to insert
5901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // a level terminator.
5902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool canUpdateState = TRUE;
5903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Consume all the CEs that were consumed at the end of the previous
5905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // iteration without updating the iterator state. On identical level,
5906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // consume the code points.
5907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t counter = cces;
5908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(level < UCOL_PSK_IDENTICAL) {
5909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(counter-->0) {
5910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // If we're doing French and we are on the secondary level,
5911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // we go backwards.
5912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(level == UCOL_PSK_SECONDARY && doingFrench) {
5913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetPrevCE(coll, &s, status);
5914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &s, status);
5916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(CE==UCOL_NO_MORE_CES) {
5918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* should not happen */
5919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *status=U_INTERNAL_PROGRAM_ERROR;
5920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UTRACE_EXIT_STATUS(*status);
5921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return 0;
5922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(uprv_numAvailableExpCEs(s)) {
5924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                canUpdateState = FALSE;
5925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
5928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(counter-->0) {
5929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uiter_next32(s.iterator);
5930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // French secondary needs to know whether the iterator state of zero came from previous level OR
5934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // from a new invocation...
5935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool wasDoingPrimary = FALSE;
5936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // destination buffer byte counter. When this guy
5937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // gets to count, we're done with the iteration
5938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0;
5939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // used to count the zero bytes written after we
5940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // have finished with the sort key
5941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t j = 0;
5942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Hm.... I think we're ready to plunge in. Basic story is as following:
5945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // we have a fall through case based on level. This is used for initial
5946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // positioning on iteration start. Every level processor contains a
5947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // for(;;) which will be broken when we exhaust all the CEs. Other
5948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // way to exit is a goto saveState, which happens when we have filled
5949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // out our buffer.
5950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(level) {
5951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_PRIMARY:
5952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        wasDoingPrimary = TRUE;
5953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
5954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(i==count) {
5955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto saveState;
5956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We should save the state only if we
5958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // are sure that we are done with the
5959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // previous iterator state
5960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(canUpdateState && byteCountOrFrenchDone == 0) {
5961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                newState = s.iterator->getState(s.iterator);
5962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(newState != UITER_NO_STATE) {
5963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    iterState = newState;
5964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
5965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            CE = ucol_IGetNextCE(coll, &s, status);
5968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            cces++;
5969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(CE==UCOL_NO_MORE_CES) {
5970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Add the level separator
5971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                terminatePSKLevel(level, maxLevel, i, dest);
5972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                byteCountOrFrenchDone=0;
5973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Restart the iteration an move to the
5974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // second level
5975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                s.iterator->move(s.iterator, 0, UITER_START);
5976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                cces = 0;
5977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                level = UCOL_PSK_SECONDARY;
5978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
5979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
598027f654740f2a26ad62a5c155af9199af9e69b889claireho            if(!isContinuation(CE)){
598127f654740f2a26ad62a5c155af9199af9e69b889claireho                if(coll->leadBytePermutationTable != NULL){
598227f654740f2a26ad62a5c155af9199af9e69b889claireho                    CE = (coll->leadBytePermutationTable[CE>>24] << 24) | (CE & 0x00FFFFFF);
598327f654740f2a26ad62a5c155af9199af9e69b889claireho                }
598427f654740f2a26ad62a5c155af9199af9e69b889claireho            }
5985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!isShiftedCE(CE, LVT, &wasShifted)) {
5986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */
5987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE != 0) {
5988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(byteCountOrFrenchDone == 0) {
5989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // get the second byte of primary
5990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        dest[i++]=(uint8_t)(CE >> 8);
5991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        byteCountOrFrenchDone = 0;
5993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((CE &=0xff)!=0) {
5995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(i==count) {
5996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* overflow */
5997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            byteCountOrFrenchDone = 1;
5998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            cces--;
5999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto saveState;
6000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        dest[i++]=(uint8_t)CE;
6002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(uprv_numAvailableExpCEs(s)) {
6006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                canUpdateState = FALSE;
6007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
6008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                canUpdateState = TRUE;
6009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* fall through to next level */
6012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    case UCOL_PSK_SECONDARY:
6013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(strength >= UCOL_SECONDARY) {
6014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(!doingFrench) {
6015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for(;;) {
6016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(i == count) {
6017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto saveState;
6018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // We should save the state only if we
6020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // are sure that we are done with the
6021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // previous iterator state
6022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(canUpdateState) {
6023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        newState = s.iterator->getState(s.iterator);
6024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(newState != UITER_NO_STATE) {
6025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            iterState = newState;
6026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            cces = 0;
6027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = ucol_IGetNextCE(coll, &s, status);
6030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces++;
6031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE==UCOL_NO_MORE_CES) {
6032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Add the level separator
6033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        terminatePSKLevel(level, maxLevel, i, dest);
6034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        byteCountOrFrenchDone = 0;
6035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Restart the iteration an move to the
6036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // second level
6037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        s.iterator->move(s.iterator, 0, UITER_START);
6038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        cces = 0;
6039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        level = UCOL_PSK_CASE;
6040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
6041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isShiftedCE(CE, LVT, &wasShifted)) {
6043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE >>= 8; /* get secondary */
6044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(CE != 0) {
6045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++]=(uint8_t)CE;
6046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(uprv_numAvailableExpCEs(s)) {
6049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = FALSE;
6050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
6051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = TRUE;
6052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
6054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else { // French secondary processing
6055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint8_t frenchBuff[UCOL_MAX_BUFFER];
6056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                int32_t frenchIndex = 0;
6057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Here we are going backwards.
6058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // If the iterator is at the beggining, it should be
6059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // moved to end.
6060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(wasDoingPrimary) {
6061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    s.iterator->move(s.iterator, 0, UITER_LIMIT);
6062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
6063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for(;;) {
6065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(i == count) {
6066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto saveState;
6067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(canUpdateState) {
6069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        newState = s.iterator->getState(s.iterator);
6070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(newState != UITER_NO_STATE) {
6071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            iterState = newState;
6072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            cces = 0;
6073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = ucol_IGetPrevCE(coll, &s, status);
6076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces++;
6077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE==UCOL_NO_MORE_CES) {
6078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Add the level separator
6079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        terminatePSKLevel(level, maxLevel, i, dest);
6080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        byteCountOrFrenchDone = 0;
6081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Restart the iteration an move to the next level
6082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        s.iterator->move(s.iterator, 0, UITER_START);
6083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        level = UCOL_PSK_CASE;
6084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
6085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(isContinuation(CE)) { // if it's a continuation, we want to save it and
6087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // reverse when we get a first non-continuation CE.
6088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE >>= 8;
6089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        frenchBuff[frenchIndex++] = (uint8_t)CE;
6090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else if(!isShiftedCE(CE, LVT, &wasShifted)) {
6091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE >>= 8; /* get secondary */
6092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(!frenchIndex) {
6093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(CE != 0) {
6094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                dest[i++]=(uint8_t)CE;
6095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
6097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            frenchBuff[frenchIndex++] = (uint8_t)CE;
6098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            frenchIndex -= usedFrench;
6099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            usedFrench = 0;
6100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while(i < count && frenchIndex) {
6101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                dest[i++] = frenchBuff[--frenchIndex];
6102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                usedFrench++;
6103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(uprv_numAvailableExpCEs(s)) {
6107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = FALSE;
6108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
6109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = TRUE;
6110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
6112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_CASE;
6115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
6117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_CASE:
6118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
6119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t caseShift = UCOL_CASE_SHIFT_START;
6120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint8_t caseByte = UCOL_CASE_BYTE_START;
6121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint8_t caseBits = 0;
6122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
612450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(caseShift <= UCOL_CASE_SHIFT_START);
6125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(i == count) {
6126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto saveState;
6127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // We should save the state only if we
6129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // are sure that we are done with the
6130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // previous iterator state
6131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(canUpdateState) {
6132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    newState = s.iterator->getState(s.iterator);
6133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(newState != UITER_NO_STATE) {
6134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        iterState = newState;
6135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        cces = 0;
6136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &s, status);
6139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                cces++;
6140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE==UCOL_NO_MORE_CES) {
6141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // On the case level we might have an unfinished
6142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // case byte. Add one if it's started.
6143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(caseShift != UCOL_CASE_SHIFT_START) {
6144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        dest[i++] = caseByte;
6145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
6147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // We have finished processing CEs on this level.
6148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // However, we don't know if we have enough space
6149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // to add a case level terminator.
6150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(i < count) {
6151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Add the level separator
6152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        terminatePSKLevel(level, maxLevel, i, dest);
6153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Restart the iteration and move to the
6154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // next level
6155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        s.iterator->move(s.iterator, 0, UITER_START);
6156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        level = UCOL_PSK_TERTIARY;
6157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
6158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = FALSE;
6159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
6161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isShiftedCE(CE, LVT, &wasShifted)) {
6164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 || strength > UCOL_PRIMARY)) {
6165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // do the case level if we need to do it. We don't want to calculate
6166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // case level for primary ignorables if we have only primary strength and case level
6167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // otherwise we would break well formedness of CEs
6168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
6169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        caseBits = (uint8_t)(CE & 0xC0);
6170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // this copies the case level logic from the
6171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // sort key generation code
6172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(CE != 0) {
617350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (caseShift == 0) {
617450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                dest[i++] = caseByte;
617550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                caseShift = UCOL_CASE_SHIFT_START;
617650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                caseByte = UCOL_CASE_BYTE_START;
617750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
6178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(coll->caseFirst == UCOL_UPPER_FIRST) {
6179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if((caseBits & 0xC0) == 0) {
6180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= 1 << (--caseShift);
6181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
6182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= 0 << (--caseShift);
6183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    /* second bit */
6184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(caseShift == 0) {
6185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        dest[i++] = caseByte;
6186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        caseShift = UCOL_CASE_SHIFT_START;
6187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        caseByte = UCOL_CASE_BYTE_START;
6188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
6189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= ((caseBits>>6)&1) << (--caseShift);
6190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
6191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
6192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if((caseBits & 0xC0) == 0) {
6193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= 0 << (--caseShift);
6194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
6195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= 1 << (--caseShift);
6196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    /* second bit */
6197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(caseShift == 0) {
6198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        dest[i++] = caseByte;
6199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        caseShift = UCOL_CASE_SHIFT_START;
6200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        caseByte = UCOL_CASE_BYTE_START;
6201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
6202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= ((caseBits>>7)&1) << (--caseShift);
6203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
6204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Not sure this is correct for the case level - revisit
6210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(uprv_numAvailableExpCEs(s)) {
6211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = FALSE;
6212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
6213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = TRUE;
6214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
6215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_TERTIARY;
6218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
6220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_TERTIARY:
6221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(strength >= UCOL_TERTIARY) {
6222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
6223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(i == count) {
6224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto saveState;
6225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // We should save the state only if we
6227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // are sure that we are done with the
6228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // previous iterator state
6229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(canUpdateState) {
6230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    newState = s.iterator->getState(s.iterator);
6231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(newState != UITER_NO_STATE) {
6232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        iterState = newState;
6233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        cces = 0;
6234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &s, status);
6237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                cces++;
6238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE==UCOL_NO_MORE_CES) {
6239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Add the level separator
6240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    terminatePSKLevel(level, maxLevel, i, dest);
6241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    byteCountOrFrenchDone = 0;
6242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Restart the iteration an move to the
6243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // second level
6244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    s.iterator->move(s.iterator, 0, UITER_START);
6245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
6246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    level = UCOL_PSK_QUATERNARY;
6247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
6248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isShiftedCE(CE, LVT, &wasShifted)) {
6250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    notIsContinuation = !isContinuation(CE);
6251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(notIsContinuation) {
6253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
6254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE ^= coll->caseSwitch;
6255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE &= coll->tertiaryMask;
6256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
6257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
6258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE != 0) {
6261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        dest[i++]=(uint8_t)CE;
6262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(uprv_numAvailableExpCEs(s)) {
6265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = FALSE;
6266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
6267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = TRUE;
6268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // if we're not doing tertiary
6272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // skip to the end
6273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_NULL;
6274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
6276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_QUATERNARY:
6277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(strength >= UCOL_QUATERNARY) {
6278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
6279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(i == count) {
6280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto saveState;
6281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // We should save the state only if we
6283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // are sure that we are done with the
6284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // previous iterator state
6285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(canUpdateState) {
6286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    newState = s.iterator->getState(s.iterator);
6287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(newState != UITER_NO_STATE) {
6288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        iterState = newState;
6289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        cces = 0;
6290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &s, status);
6293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                cces++;
6294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE==UCOL_NO_MORE_CES) {
6295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Add the level separator
6296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    terminatePSKLevel(level, maxLevel, i, dest);
6297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //dest[i++] = UCOL_LEVELTERMINATOR;
6298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    byteCountOrFrenchDone = 0;
6299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Restart the iteration an move to the
6300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // second level
6301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    s.iterator->move(s.iterator, 0, UITER_START);
6302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
6303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    level = UCOL_PSK_QUIN;
6304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
6305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE==0)
6307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
6308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(isShiftedCE(CE, LVT, &wasShifted)) {
6309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE >>= 16; /* get primary */
6310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE != 0) {
6311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(byteCountOrFrenchDone == 0) {
6312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++]=(uint8_t)(CE >> 8);
6313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
6314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            byteCountOrFrenchDone = 0;
6315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if((CE &=0xff)!=0) {
6317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(i==count) {
6318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                /* overflow */
6319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                byteCountOrFrenchDone = 1;
6320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goto saveState;
6321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++]=(uint8_t)CE;
6323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
6326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    notIsContinuation = !isContinuation(CE);
6327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(notIsContinuation) {
6328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
6329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++] = UCOL_HIRAGANA_QUAD;
6330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
6331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++] = 0xFF;
6332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(uprv_numAvailableExpCEs(s)) {
6336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = FALSE;
6337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
6338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = TRUE;
6339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // if we're not doing quaternary
6343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // skip to the end
6344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_NULL;
6345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
6347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_QUIN:
6348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        level = UCOL_PSK_IDENTICAL;
6349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
6350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_IDENTICAL:
6351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(strength >= UCOL_IDENTICAL) {
6352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UChar32 first, second;
6353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            int32_t bocsuBytesWritten = 0;
6354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // We always need to do identical on
6355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // the NFD form of the string.
6356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(normIter == NULL) {
6357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // we arrived from the level below and
6358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // normalization was not turned on.
6359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // therefore, we need to make a fresh NFD iterator
6360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
6361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
6362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else if(!doingIdenticalFromStart) {
6363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // there is an iterator, but we did some other levels.
6364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // therefore, we have a FCD iterator - need to make
6365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // a NFD one.
6366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // normIter being at the beginning does not guarantee
6367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // that the underlying iterator is at the beginning
6368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                iter->move(iter, 0, UITER_START);
6369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
6370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // At this point we have a NFD iterator that is positioned
6372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // in the right place
6373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(U_FAILURE(*status)) {
6374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UTRACE_EXIT_STATUS(*status);
6375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return 0;
6376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            first = uiter_previous32(s.iterator);
6378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // maybe we're at the start of the string
6379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(first == U_SENTINEL) {
6380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                first = 0;
6381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
6382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uiter_next32(s.iterator);
6383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            j = 0;
6386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
6387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(i == count) {
6388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(j+1 < bocsuBytesWritten) {
6389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        bocsuBytesUsed = j+1;
6390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto saveState;
6392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // On identical level, we will always save
6395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // the state if we reach this point, since
6396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // we don't depend on getNextCE for content
6397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // all the content is in our buffer and we
6398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // already either stored the full buffer OR
6399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // otherwise we won't arrive here.
6400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                newState = s.iterator->getState(s.iterator);
6401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(newState != UITER_NO_STATE) {
6402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    iterState = newState;
6403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
6404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint8_t buff[4];
6407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                second = uiter_next32(s.iterator);
6408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                cces++;
6409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // end condition for identical level
6411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(second == U_SENTINEL) {
6412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    terminatePSKLevel(level, maxLevel, i, dest);
6413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    level = UCOL_PSK_NULL;
6414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
6415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, second, buff);
6417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                first = second;
6418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                j = 0;
6420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(bocsuBytesUsed != 0) {
6421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(bocsuBytesUsed-->0) {
6422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        j++;
6423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(i < count && j < bocsuBytesWritten) {
6427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    dest[i++] = buff[j++];
6428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_NULL;
6433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
6435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_NULL:
6436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        j = i;
6437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(j<count) {
6438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            dest[j++]=0;
6439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        break;
6441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
6442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_INTERNAL_PROGRAM_ERROR;
6443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_STATUS(*status);
6444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusaveState:
6448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Now we need to return stuff. First we want to see whether we have
6449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // done everything for the current state of iterator.
6450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(byteCountOrFrenchDone
6451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || canUpdateState == FALSE
6452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE)
6453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
6454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Any of above mean that the previous transaction
6455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // wasn't finished and that we should store the
6456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // previous iterator state.
6457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        state[0] = iterState;
6458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
6459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // The transaction is complete. We will continue in the next iteration.
6460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        state[0] = s.iterator->getState(s.iterator);
6461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cces = 0;
6462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Store the number of bocsu bytes written.
6464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) {
6465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_INDEX_OUTOFBOUNDS_ERROR;
6466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BYTES_SHIFT;
6468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Next we put in the level of comparison
6470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT);
6471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If we are doing French, we need to store whether we have just finished the French level
6473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(level == UCOL_PSK_SECONDARY && doingFrench) {
6474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        state[1] |= (((state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
6475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
6476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        state[1] |= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
6477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Was the latest CE shifted
6480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(wasShifted) {
6481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        state[1] |= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT;
6482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Check for cces overflow
6484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) {
6485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_INDEX_OUTOFBOUNDS_ERROR;
6486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Store cces
6488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    state[1] |= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SHIFT);
6489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Check for French overflow
6491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) {
6492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_INDEX_OUTOFBOUNDS_ERROR;
6493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Store number of bytes written in the French secondary continuation sequence
6495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    state[1] |= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENCH_SHIFT);
6496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If we have used normalizing iterator, get rid of it
6499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(normIter != NULL) {
6500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(normIter);
6501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* To avoid memory leak, free the offset buffer if necessary. */
6504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucol_freeOffsetBuffer(&s);
6505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Return number of meaningful sortkey bytes.
6507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
6508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  dest,i, state[0], state[1]);
6509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_EXIT_VALUE(i);
6510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return i;
6511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
6514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Produce a bound for a given sortkey and a number of levels.
6515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
6516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
6517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getBound(const uint8_t       *source,
6518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             sourceLength,
6519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UColBoundMode       boundType,
6520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t            noOfLevels,
6521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint8_t             *result,
6522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             resultLength,
6523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode          *status)
6524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
6525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // consistency checks
6526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(status == NULL || U_FAILURE(*status)) {
6527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(source == NULL) {
6530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
6531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t sourceIndex = 0;
6535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Scan the string until we skip enough of the key OR reach the end of the key
6536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    do {
6537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sourceIndex++;
6538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
6539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            noOfLevels--;
6540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } while (noOfLevels > 0
6542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
6543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
6545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        && noOfLevels > 0) {
6546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_SORT_KEY_TOO_SHORT_WARNING;
6547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // READ ME: this code assumes that the values for boundType
6551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // enum will not changes. They are set so that the enum value
6552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // corresponds to the number of extra bytes each bound type
6553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // needs.
6554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(result != NULL && resultLength >= sourceIndex+boundType) {
6555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(result, source, sourceIndex);
6556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        switch(boundType) {
6557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Lower bound just gets terminated. No extra bytes
6558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case UCOL_BOUND_LOWER: // = 0
6559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
6560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Upper bound needs one extra byte
6561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case UCOL_BOUND_UPPER: // = 1
6562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result[sourceIndex++] = 2;
6563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
6564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Upper long bound needs two extra bytes
6565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case UCOL_BOUND_UPPER_LONG: // = 2
6566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result[sourceIndex++] = 0xFF;
6567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result[sourceIndex++] = 0xFF;
6568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
6569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        default:
6570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR;
6571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return 0;
6572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result[sourceIndex++] = 0;
6574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return sourceIndex;
6576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
6577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return sourceIndex+boundType+1;
6578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
6582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the functions that deal with the properties of a collator  */
6583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there are new APIs and some compatibility APIs                           */
6584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
6585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void
6587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
6588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    int32_t *primShift, int32_t *secShift, int32_t *terShift)
6589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
6590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
6591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool reverseSecondary = FALSE;
659227f654740f2a26ad62a5c155af9199af9e69b889claireho    UBool continuation = isContinuation(CE);
659327f654740f2a26ad62a5c155af9199af9e69b889claireho    if(!continuation) {
6594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tertiary = (uint8_t)((CE & coll->tertiaryMask));
6595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tertiary ^= coll->caseSwitch;
6596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        reverseSecondary = TRUE;
6597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
6598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
6599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tertiary &= UCOL_REMOVE_CASE;
6600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        reverseSecondary = FALSE;
6601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
6604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
6605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    primary1 = (uint8_t)(CE >> 8);
6606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(primary1 != 0) {
660827f654740f2a26ad62a5c155af9199af9e69b889claireho        if (coll->leadBytePermutationTable != NULL && !continuation) {
660927f654740f2a26ad62a5c155af9199af9e69b889claireho            primary1 = coll->leadBytePermutationTable[primary1];
661027f654740f2a26ad62a5c155af9199af9e69b889claireho        }
661127f654740f2a26ad62a5c155af9199af9e69b889claireho
6612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneCEs[ch] |= (primary1 << *primShift);
6613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *primShift -= 8;
6614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(primary2 != 0) {
6616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(*primShift < 0) {
6617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
6618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
6619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
6620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return;
6621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneCEs[ch] |= (primary2 << *primShift);
6623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *primShift -= 8;
6624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(secondary != 0) {
6626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse secondary
6627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space for secondary
6628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24);
6629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else { // normal case
6630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secShift);
6631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *secShift -= 8;
6633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(tertiary != 0) {
6635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneCEs[2*coll->latinOneTableLen+ch] |= (tertiary << *terShift);
6636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *terShift -= 8;
6637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool
6641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_resizeLatinOneTable(UCollator *coll, int32_t size, UErrorCode *status) {
6642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t *newTable = (uint32_t *)uprv_malloc(size*sizeof(uint32_t)*3);
6643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(newTable == NULL) {
6644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *status = U_MEMORY_ALLOCATION_ERROR;
6645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      coll->latinOneFailed = TRUE;
6646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return FALSE;
6647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTableLen)*sizeof(uint32_t);
6649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memset(newTable, 0, size*sizeof(uint32_t)*3);
6650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy);
6651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToCopy);
6652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(newTable+2*size, coll->latinOneCEs+2*coll->latinOneTableLen, sizeToCopy);
6653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    coll->latinOneTableLen = size;
6654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_free(coll->latinOneCEs);
6655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    coll->latinOneCEs = newTable;
6656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
6657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool
6660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
6661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool result = TRUE;
6662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(coll->latinOneCEs == NULL) {
6663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*UCOL_LATINONETABLELEN*3);
6664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->latinOneCEs == NULL) {
6665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
6666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return FALSE;
6667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneTableLen = UCOL_LATINONETABLELEN;
6669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar ch = 0;
6671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UCollationElements *it = ucol_openElements(coll, &ch, 1, status);
6672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Check for null pointer
6673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(*status)) {
6674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return FALSE;
6675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)*coll->latinOneTableLen*3);
6677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t primShift = 24, secShift = 24, terShift = 24;
6679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t CE = 0;
6680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1;
6681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // TODO: make safe if you get more than you wanted...
6683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) {
6684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        primShift = 24; secShift = 24; terShift = 24;
6685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(ch < 0x100) {
6686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE = coll->latinOneMapping[ch];
6687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
6689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(CE == UCOL_NOT_FOUND && coll->UCA) {
6690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
6691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(CE < UCOL_NOT_FOUND) {
6694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
6695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            switch (getCETag(CE)) {
6697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            case EXPANSION_TAG:
6698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            case DIGIT_TAG:
6699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                ucol_setText(it, &ch, 1, status);
6700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) {
6701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(primShift < 0 || secShift < 0 || terShift < 0) {
6702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
6703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
6704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
6705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
6706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
6707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
6708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
6710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            case CONTRACTION_TAG:
6711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // here is the trick
6712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // F2 is contraction. We do something very similar to contractions
6713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // but have two indices, one in the real contraction table and the
6714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // other to where we stuffed things. This hopes that we don't have
6715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // many contractions (this should work for latin-1 tables).
6716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                {
6717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((CE & 0x00FFF000) != 0) {
6718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_UNSUPPORTED_ERROR;
6719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto cleanup_after_failure;
6720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
6721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
6723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
6725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneCEs[ch] = CE;
6727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneCEs[coll->latinOneTableLen+ch] = CE;
6728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE;
6729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // We're going to jump into contraction table, pick the elements
6731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // and use them
6732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    do {
6733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
6734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (UCharOffset - coll->contractionIndex));
6735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) {
6736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uint32_t size;
6737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uint32_t i;    /* general counter */
6738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uint32_t *CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
6739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            size = getExpansionCount(CE);
6740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //CE = *CEOffset++;
6741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
6742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                for(i = 0; i<size; i++) {
6743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(primShift < 0 || secShift < 0 || terShift < 0) {
6744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        break;
6748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
6749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
6750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
6751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else { /* else, we do */
6752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                while(*CEOffset != 0) {
6753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(primShift < 0 || secShift < 0 || terShift < 0) {
6754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        break;
6758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
6759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
6760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
6761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            contractionOffset++;
6763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if(CE < UCOL_NOT_FOUND) {
6764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            ucol_addLatinOneEntry(coll, (UChar)contractionOffset++, CE, &primShift, &secShift, &terShift);
6765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
6766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            contractionOffset++;
6770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCharOffset++;
6772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        primShift = 24; secShift = 24; terShift = 24;
6773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate
6774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) {
6775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goto cleanup_after_failure;
6776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } while(*UCharOffset != 0xFFFF);
6779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
6780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;;
6781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            case SPEC_PROC_TAG:
6782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                {
6783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // 0xB7 is a precontext character defined in UCA5.1, a special
6784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // handle is implemeted in order to save LatinOne table for
6785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // most locales.
6786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (ch==0xb7) {
6787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
6788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else {
6790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto cleanup_after_failure;
6791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
6793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
6794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            default:
6795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto cleanup_after_failure;
6796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // compact table
6800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(contractionOffset < coll->latinOneTableLen) {
6801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) {
6802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            goto cleanup_after_failure;
6803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ucol_closeElements(it);
6806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return result;
6807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querucleanup_after_failure:
6809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // status should already be set before arriving here.
6810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    coll->latinOneFailed = TRUE;
6811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ucol_closeElements(it);
6812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return FALSE;
6813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid ucol_updateInternalState(UCollator *coll, UErrorCode *status) {
6816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_SUCCESS(*status)) {
6817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->caseFirst == UCOL_UPPER_FIRST) {
6818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->caseSwitch = UCOL_CASE_SWITCH;
6819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->caseSwitch = UCOL_NO_CASE_SWITCH;
6821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) {
6824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryMask = UCOL_REMOVE_CASE;
6825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
6826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF; /* Should be 0x80 */
6827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF;
6828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryBottom = UCOL_COMMON_BOT3;
6829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryMask = UCOL_KEEP_CASE;
6831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON;
6832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(coll->caseFirst == UCOL_UPPER_FIRST) {
6833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST;
6834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER;
6835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER;
6836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
6837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
6838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER;
6839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER;
6840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* Set the compression values */
6844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - UCOL_COMMON_BOT3-1);
6845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal); /* we multilply double with int, but need only int */
6846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount);
6847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY
6849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE)
6850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
6851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->sortKeyGen = ucol_calcSortKeySimpleTertiary;
6852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->sortKeyGen = ucol_calcSortKey;
6854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF
6856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed)
6857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
6858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) {
6859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it
6860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //fprintf(stderr, "F");
6861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneUse = TRUE;
6862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
6863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneUse = FALSE;
6864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(*status == U_UNSUPPORTED_ERROR) {
6866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *status = U_ZERO_ERROR;
6867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else { // latin1Table exists and it doesn't need to be regenerated, just use it
6869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->latinOneUse = TRUE;
6870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneUse = FALSE;
6873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t  U_EXPORT2
6878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) {
6879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(len == -1) {
6883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        len = u_strlen(varTop);
6884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(len == 0) {
6886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
6887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collIterate s;
689150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, varTop, len, &s, status);
689250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
689350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
689450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
6895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t CE = ucol_IGetNextCE(coll, &s, status);
6897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* here we check if we have consumed all characters */
6899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* you can put in either one character or a contraction */
6900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* you shouldn't put more... */
6901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(s.pos != s.endp || CE == UCOL_NO_MORE_CES) {
6902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_CE_NOT_FOUND_ERROR;
6903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t nextCE = ucol_IGetNextCE(coll, &s, status);
6907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) {
6909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_PRIMARY_TOO_LONG_ERROR;
6910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) {
6913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->variableTopValueisDefault = FALSE;
6914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
6915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* To avoid memory leak, free the offset buffer if necessary. */
6918b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucol_freeOffsetBuffer(&s);
6919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return CE & UCOL_PRIMARYMASK;
6921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
6924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return coll->variableTopValue<<16;
6928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void  U_EXPORT2
6931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
6932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
6934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) {
6937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->variableTopValueisDefault = FALSE;
6938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
6939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Attribute setter API */
6942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void  U_EXPORT2
6943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
6944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return;
6946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UColAttributeValue oldFrench = coll->frenchCollation;
6948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UColAttributeValue oldCaseFirst = coll->caseFirst;
6949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(attr) {
6950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_NUMERIC_COLLATION: /* sort substrings of digits as numbers */
6951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(value == UCOL_ON) {
6952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollation = UCOL_ON;
6953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollationisDefault = FALSE;
6954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollation = UCOL_OFF;
6956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollationisDefault = FALSE;
6957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollationisDefault = TRUE;
6959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollation = (UColAttributeValue)coll->options->numericCollation;
6960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR;
6962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        break;
6964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_HIRAGANA_QUATERNARY_MODE: /* special quaternary values for Hiragana */
6965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(value == UCOL_ON) {
6966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQ = UCOL_ON;
6967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQisDefault = FALSE;
6968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQ = UCOL_OFF;
6970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQisDefault = FALSE;
6971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQisDefault = TRUE;
6973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQ = (UColAttributeValue)coll->options->hiraganaQ;
6974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR;
6976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        break;
6978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
6979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_ON) {
6980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollation = UCOL_ON;
6981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollationisDefault = FALSE;
6982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollation = UCOL_OFF;
6984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollationisDefault = FALSE;
6985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollationisDefault = TRUE;
6987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollation = (UColAttributeValue)coll->options->frenchCollation;
6988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
6990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
6993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_SHIFTED) {
6994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandling = UCOL_SHIFTED;
6995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandlingisDefault = FALSE;
6996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_NON_IGNORABLE) {
6997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandling = UCOL_NON_IGNORABLE;
6998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandlingisDefault = FALSE;
6999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
7000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandlingisDefault = TRUE;
7001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandling = (UColAttributeValue)coll->options->alternateHandling ;
7002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
7003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
7004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
7006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
7007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_LOWER_FIRST) {
7008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirst = UCOL_LOWER_FIRST;
7009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirstisDefault = FALSE;
7010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_UPPER_FIRST) {
7011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirst = UCOL_UPPER_FIRST;
7012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirstisDefault = FALSE;
7013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
7014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->caseFirst = UCOL_OFF;
7015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->caseFirstisDefault = FALSE;
7016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
7017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirst = (UColAttributeValue)coll->options->caseFirst;
7018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirstisDefault = TRUE;
7019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
7020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
7021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
7023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_CASE_LEVEL: /* do we have an extra case level */
7024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_ON) {
7025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevel = UCOL_ON;
7026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevelisDefault = FALSE;
7027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
7028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevel = UCOL_OFF;
7029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevelisDefault = FALSE;
7030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
7031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevel = (UColAttributeValue)coll->options->caseLevel;
7032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevelisDefault = TRUE;
7033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
7034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
7035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
7037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
7038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_ON) {
7039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationMode = UCOL_ON;
7040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationModeisDefault = FALSE;
704127f654740f2a26ad62a5c155af9199af9e69b889claireho            initializeFCD(status);
7042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
7043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationMode = UCOL_OFF;
7044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationModeisDefault = FALSE;
7045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
7046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationModeisDefault = TRUE;
7047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationMode = (UColAttributeValue)coll->options->normalizationMode;
704827f654740f2a26ad62a5c155af9199af9e69b889claireho            if(coll->normalizationMode == UCOL_ON) {
704927f654740f2a26ad62a5c155af9199af9e69b889claireho                initializeFCD(status);
705027f654740f2a26ad62a5c155af9199af9e69b889claireho            }
7051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
7052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
7053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
7055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_STRENGTH:         /* attribute for strength */
7056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (value == UCOL_DEFAULT) {
7057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->strengthisDefault = TRUE;
7058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->strength = (UColAttributeValue)coll->options->strength;
7059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value <= UCOL_IDENTICAL) {
7060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->strengthisDefault = FALSE;
7061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->strength = value;
7062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
7063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
7064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
7066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_ATTRIBUTE_COUNT:
7067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
7068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
7069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
7070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) {
7072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneRegenTable = TRUE;
7073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
7074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneRegenTable = FALSE;
7075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucol_updateInternalState(coll, status);
7077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UColAttributeValue  U_EXPORT2
7080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
7081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
7082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return UCOL_DEFAULT;
7083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(attr) {
7085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_NUMERIC_COLLATION:
7086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return coll->numericCollation;
7087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_HIRAGANA_QUATERNARY_MODE:
7088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return coll->hiraganaQ;
7089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
7090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->frenchCollation;
7091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
7092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->alternateHandling;
7093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
7094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->caseFirst;
7095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_CASE_LEVEL: /* do we have an extra case level */
7096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->caseLevel;
7097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
7098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->normalizationMode;
7099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_STRENGTH:         /* attribute for strength */
7100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->strength;
7101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_ATTRIBUTE_COUNT:
7102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
7103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
7104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
7105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return UCOL_DEFAULT;
7107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
7110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setStrength(    UCollator                *coll,
7111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UCollationStrength        strength)
7112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
7113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
7114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
7115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationStrength U_EXPORT2
7118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getStrength(const UCollator *coll)
7119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
7120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
7121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
7122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
712427f654740f2a26ad62a5c155af9199af9e69b889clairehoU_INTERNAL int32_t U_EXPORT2
712527f654740f2a26ad62a5c155af9199af9e69b889clairehoucol_getReorderCodes(const UCollator *coll,
712627f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t *dest,
712727f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t destCapacity,
712827f654740f2a26ad62a5c155af9199af9e69b889claireho                    UErrorCode *pErrorCode) {
712927f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(*pErrorCode)) {
713027f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
713127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
713227f654740f2a26ad62a5c155af9199af9e69b889claireho
713327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
713427f654740f2a26ad62a5c155af9199af9e69b889claireho        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
713527f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
713627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
713727f654740f2a26ad62a5c155af9199af9e69b889claireho
713827f654740f2a26ad62a5c155af9199af9e69b889claireho    if (coll->reorderCodesLength > destCapacity) {
713927f654740f2a26ad62a5c155af9199af9e69b889claireho        *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
714027f654740f2a26ad62a5c155af9199af9e69b889claireho        return coll->reorderCodesLength;
714127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
714227f654740f2a26ad62a5c155af9199af9e69b889claireho    for (int32_t i = 0; i < coll->reorderCodesLength; i++) {
714327f654740f2a26ad62a5c155af9199af9e69b889claireho        dest[i] = coll->reorderCodes[i];
714427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
714527f654740f2a26ad62a5c155af9199af9e69b889claireho    return coll->reorderCodesLength;
714627f654740f2a26ad62a5c155af9199af9e69b889claireho}
714727f654740f2a26ad62a5c155af9199af9e69b889claireho
714827f654740f2a26ad62a5c155af9199af9e69b889clairehoU_INTERNAL void U_EXPORT2
714927f654740f2a26ad62a5c155af9199af9e69b889clairehoucol_setReorderCodes(UCollator *coll,
715027f654740f2a26ad62a5c155af9199af9e69b889claireho                    const int32_t *reorderCodes,
715127f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t reorderCodesLength,
715227f654740f2a26ad62a5c155af9199af9e69b889claireho                    UErrorCode *pErrorCode) {
715327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(*pErrorCode)) {
715427f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
715527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
715627f654740f2a26ad62a5c155af9199af9e69b889claireho
715727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (reorderCodesLength < 0 || (reorderCodesLength > 0 && reorderCodes == NULL)) {
715827f654740f2a26ad62a5c155af9199af9e69b889claireho        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
715927f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
716027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
716127f654740f2a26ad62a5c155af9199af9e69b889claireho
716227f654740f2a26ad62a5c155af9199af9e69b889claireho    uprv_free(coll->reorderCodes);
716327f654740f2a26ad62a5c155af9199af9e69b889claireho    coll->reorderCodes = NULL;
716427f654740f2a26ad62a5c155af9199af9e69b889claireho    coll->reorderCodesLength = 0;
716527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (reorderCodesLength == 0) {
716627f654740f2a26ad62a5c155af9199af9e69b889claireho        uprv_free(coll->leadBytePermutationTable);
716727f654740f2a26ad62a5c155af9199af9e69b889claireho        coll->leadBytePermutationTable = NULL;
716827f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
716927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
717027f654740f2a26ad62a5c155af9199af9e69b889claireho    coll->reorderCodes = (int32_t*) uprv_malloc(reorderCodesLength * sizeof(int32_t));
717127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (coll->reorderCodes == NULL) {
717227f654740f2a26ad62a5c155af9199af9e69b889claireho        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
717327f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
717427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
717527f654740f2a26ad62a5c155af9199af9e69b889claireho    for (int32_t i = 0; i < reorderCodesLength; i++) {
717627f654740f2a26ad62a5c155af9199af9e69b889claireho        coll->reorderCodes[i] = reorderCodes[i];
717727f654740f2a26ad62a5c155af9199af9e69b889claireho    }
717827f654740f2a26ad62a5c155af9199af9e69b889claireho    coll->reorderCodesLength = reorderCodesLength;
717927f654740f2a26ad62a5c155af9199af9e69b889claireho    ucol_buildPermutationTable(coll, pErrorCode);
718027f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(*pErrorCode)) {
718127f654740f2a26ad62a5c155af9199af9e69b889claireho        uprv_free(coll->reorderCodes);
718227f654740f2a26ad62a5c155af9199af9e69b889claireho        coll->reorderCodes = NULL;
718327f654740f2a26ad62a5c155af9199af9e69b889claireho        coll->reorderCodesLength = 0;
718427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
718527f654740f2a26ad62a5c155af9199af9e69b889claireho}
718627f654740f2a26ad62a5c155af9199af9e69b889claireho
718727f654740f2a26ad62a5c155af9199af9e69b889claireho
7188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
7189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are misc functions                                             */
7190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there are new APIs and some compatibility APIs                           */
7191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
7192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
7194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getVersion(const UCollator* coll,
7195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UVersionInfo versionInfo)
7196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
7197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* RunTime version  */
7198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t rtVersion = UCOL_RUNTIME_VERSION;
7199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Builder version*/
7200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t bdVersion = coll->image->version[0];
7201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Charset Version. Need to get the version from cnv files
7203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * makeconv should populate cnv files with version and
7204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * an api has to be provided in ucnv.h to obtain this version
7205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
7206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t csVersion = 0;
7207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* combine the version info */
7209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t cmbVersion = (uint16_t)((rtVersion<<11) | (bdVersion<<6) | (csVersion));
7210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Tailoring rules */
7212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    versionInfo[0] = (uint8_t)(cmbVersion>>8);
7213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    versionInfo[1] = (uint8_t)cmbVersion;
7214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    versionInfo[2] = coll->image->version[1];
7215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(coll->UCA) {
7216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        /* Include the minor number when getting the UCA version. (major & 1f) << 3 | (minor & 7) */
7217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        versionInfo[3] = (coll->UCA->image->UCAVersion[0] & 0x1f) << 3 | (coll->UCA->image->UCAVersion[1] & 0x07);
7218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
7219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        versionInfo[3] = 0;
7220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This internal API checks whether a character is tailored or not */
7225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool  U_EXPORT2
7226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) {
7227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL || coll == coll->UCA) {
7228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
7229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t CE = UCOL_NOT_FOUND;
7232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *ContractionStart = NULL;
7233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(u < 0x100) { /* latin-1 */
7234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CE = coll->latinOneMapping[u];
7235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) {
7236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
7237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else { /* regular */
7239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u);
7240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(isContraction(CE)) {
7243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ContractionStart = (UChar *)coll->image+getContractOffset(CE);
7244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex));
7245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UBool)(CE != UCOL_NOT_FOUND);
7248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
7252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the string compare functions                               */
7253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                          */
7254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
7255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  ucol_checkIdent    internal function.  Does byte level string compare.   */
7258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                     Used by strcoll if strength == identical and strings  */
725950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*                     are otherwise equal.                                  */
7260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                           */
7261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                     Comparison must be done on NFD normalized strings.    */
7262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                     FCD is not good enough.                               */
7263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
7265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUCollationResult    ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status)
7266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
726750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // When we arrive here, we can have normal strings or UCharIterators. Currently they are both
726850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // of same type, but that doesn't really mean that it will stay that way.
7269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t            comparison;
7270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sColl->flags & UCOL_USE_ITERATOR) {
727250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // The division for the array length may truncate the array size to
727350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
727450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // for all platforms anyway.
727550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
727650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
7277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UNormIterator *sNIt = NULL, *tNIt = NULL;
7278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
7279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
7280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sColl->iterator->move(sColl->iterator, 0, UITER_START);
7281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tColl->iterator->move(tColl->iterator, 0, UITER_START);
7282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, status);
7283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, status);
7284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        comparison = u_strCompareIter(sIt, tIt, TRUE);
7285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(sNIt);
7286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(tNIt);
7287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
728850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t sLen      = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl->endp - sColl->string) : -1;
728950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *sBuf = sColl->string;
729050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t tLen      = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl->endp - tColl->string) : -1;
729150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *tBuf = tColl->string;
7292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (normalize) {
7294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_ZERO_ERROR;
729550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Note: We could use Normalizer::compare() or similar, but for short strings
729650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // which may not be in FCD it might be faster to just NFD them.
729750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather than
729850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // NFD'ing immediately might be faster for long strings,
729950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // but string comparison is usually done on relatively short strings.
730050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN) == 0, sBuf, sLen),
730150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  sColl->writableBuffer,
730250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  *status);
730350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN) == 0, tBuf, tLen),
730450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  tColl->writableBuffer,
730550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  *status);
730650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(U_FAILURE(*status)) {
730750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return UCOL_LESS;
7308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
730950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writableBuffer);
7310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
731150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE);
7312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (comparison < 0) {
7316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_LESS;
7317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (comparison == 0) {
7318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_EQUAL;
7319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* comparison > 0 */ {
7320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_GREATER;
7321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  CEBuf - A struct and some inline functions to handle the saving    */
7325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          of CEs in a buffer within ucol_strcoll                     */
7326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_CEBUF_SIZE 512
7328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct ucol_CEBuf {
7329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *buf;
7330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *endp;
7331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *pos;
7332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t     localArray[UCOL_CEBUF_SIZE];
7333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} ucol_CEBuf;
7334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
7337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
7338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (b)->buf = (b)->pos = (b)->localArray;
7339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (b)->endp = (b)->buf + UCOL_CEBUF_SIZE;
7340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
7343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) {
7344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t  oldSize;
7345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t  newSize;
7346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t  *newBuf;
7347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ci->flags |= UCOL_ITER_ALLOCATED;
734950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    oldSize = (uint32_t)(b->pos - b->buf);
7350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    newSize = oldSize * 2;
7351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
7352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(newBuf == NULL) {
7353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_MEMORY_ALLOCATION_ERROR;
7354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    else {
7356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
7357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (b->buf != b->localArray) {
7358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_free(b->buf);
7359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        b->buf = newBuf;
7361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        b->endp = b->buf + newSize;
7362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        b->pos  = b->buf + oldSize;
7363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
7367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruinline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCode *status) {
7368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (b->pos == b->endp) {
7369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ucol_CEBuf_Expand(b, ci, status);
7370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_SUCCESS(*status)) {
7372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *(b)->pos++ = ce;
7373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is a trick string compare function that goes in and uses sortkeys to compare */
7377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* It is used when compare gets in trouble and needs to bail out                     */
7378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
7379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                  collIterate *tColl,
7380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                  UErrorCode *status)
7381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
7382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER];
7383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *sourceKeyP = sourceKey;
7384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *targetKeyP = targetKey;
7385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER;
7386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UCollator *coll = sColl->coll;
738750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *source = NULL;
738850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *target = NULL;
7389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result = UCOL_EQUAL;
739050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString sourceString, targetString;
739150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t sourceLength;
739250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t targetLength;
7393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(sColl->flags & UCOL_USE_ITERATOR) {
7395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sColl->iterator->move(sColl->iterator, 0, UITER_START);
7396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        tColl->iterator->move(tColl->iterator, 0, UITER_START);
739750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
739850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while((c=sColl->iterator->next(sColl->iterator))>=0) {
739950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            sourceString.append((UChar)c);
740050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
740150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while((c=tColl->iterator->next(tColl->iterator))>=0) {
740250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            targetString.append((UChar)c);
740350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
740450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        source = sourceString.getBuffer();
740550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sourceLength = sourceString.length();
740650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        target = targetString.getBuffer();
740750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        targetLength = targetString.length();
7408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else { // no iterators
740950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sColl->string):-1;
741050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tColl->string):-1;
7411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source = sColl->string;
7412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        target = tColl->string;
7413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
7418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(sourceKeyLen > UCOL_MAX_BUFFER) {
7419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sourceKeyP = (uint8_t*)uprv_malloc(sourceKeyLen*sizeof(uint8_t));
7420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(sourceKeyP == NULL) {
7421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
7422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto cleanup_and_do_compare;
7423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
7425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
7428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(targetKeyLen > UCOL_MAX_BUFFER) {
7429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        targetKeyP = (uint8_t*)uprv_malloc(targetKeyLen*sizeof(uint8_t));
7430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(targetKeyP == NULL) {
7431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
7432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto cleanup_and_do_compare;
7433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
7435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP);
7438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querucleanup_and_do_compare:
7440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(sourceKeyP != NULL && sourceKeyP != sourceKey) {
7441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(sourceKeyP);
7442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(targetKeyP != NULL && targetKeyP != targetKey) {
7445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(targetKeyP);
7446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(result<0) {
7449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_LESS;
7450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(result>0) {
7451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_GREATER;
7452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
7453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_EQUAL;
7454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
745850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult
745950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status)
7460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
7461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ALIGN_CODE(16);
7462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UCollator *coll = sColl->coll;
7464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // setting up the collator parameters
7467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UColAttributeValue strength = coll->strength;
7468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool initialCheckSecTer = (strength  >= UCOL_SECONDARY);
7469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkSecTer = initialCheckSecTer;
7471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkTertiary = (strength  >= UCOL_TERTIARY);
7472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkQuad = (strength  >= UCOL_QUATERNARY);
7473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkIdent = (strength == UCOL_IDENTICAL);
7474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkCase = (coll->caseLevel == UCOL_ON);
7475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer;
7476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
7477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool qShifted = shifted && checkQuad;
7478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad;
7479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(doHiragana && shifted) {
7481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return (ucol_compareUsingSortKeys(sColl, tColl, status));
7482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseSwitch = coll->caseSwitch;
7484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryMask = coll->tertiaryMask;
7485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This is the lowest primary value that will not be ignored if shifted
7487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0;
7488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollationResult result = UCOL_EQUAL;
7490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollationResult hirResult = UCOL_EQUAL;
7491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Preparing the CE buffers. They will be filled during the primary phase
7493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucol_CEBuf   sCEs;
7494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucol_CEBuf   tCEs;
7495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_INIT_CEBUF(&sCEs);
7496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_INIT_CEBUF(&tCEs);
7497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t secS = 0, secT = 0;
7499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sOrder=0, tOrder=0;
7500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Non shifted primary processing is quite simple
7502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!shifted) {
7503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
7504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // We fetch CEs until we hit a non ignorable primary or end.
7506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            do {
7507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // We get the next CE
7508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder = ucol_IGetNextCE(coll, sColl, status);
7509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Stuff it in the buffer
7510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // And keep just the primary part.
7512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sOrder &= UCOL_PRIMARYMASK;
7513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } while(sOrder == 0);
7514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // see the comments on the above block
7516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            do {
7517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder = ucol_IGetNextCE(coll, tColl, status);
7518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder &= UCOL_PRIMARYMASK;
7520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } while(tOrder == 0);
7521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // if both primaries are the same
7523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sOrder == tOrder) {
7524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // and there are no more CEs, we advance to the next level
7525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
7526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(doHiragana && hirResult == UCOL_EQUAL) {
7529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) {
7530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA))
7531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            ? UCOL_LESS:UCOL_GREATER;
7532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
753527f654740f2a26ad62a5c155af9199af9e69b889claireho                // only need to check one for continuation
753627f654740f2a26ad62a5c155af9199af9e69b889claireho                // if one is then the other must be or the preceding CE would be a prefix of the other
753727f654740f2a26ad62a5c155af9199af9e69b889claireho                if (coll->leadBytePermutationTable != NULL && !isContinuation(sOrder)) {
753827f654740f2a26ad62a5c155af9199af9e69b889claireho                    sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
753927f654740f2a26ad62a5c155af9199af9e69b889claireho                    tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
754027f654740f2a26ad62a5c155af9199af9e69b889claireho                }
7541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // if two primaries are different, we are done
7542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = (sOrder < tOrder) ?  UCOL_LESS: UCOL_GREATER;
7543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } // no primary difference... do the rest from the buffers
7546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // shifted - do a slightly more complicated processing :)
7547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
7548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UBool sInShifted = FALSE;
7549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UBool tInShifted = FALSE;
7550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // This version of code can be refactored. However, it seems easier to understand this way.
7551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Source loop. Sam as the target loop.
7552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder = ucol_IGetNextCE(coll, sColl, status);
7554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == UCOL_NO_MORE_CES) {
7555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
7558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* UCA amendment - ignore ignorables that follow shifted code points */
7559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(isContinuation(sOrder)) {
7561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
7562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(sInShifted) {
7563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
7564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            break;
7569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else { /* Just lower level values */
7571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(sInShifted) {
7572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* regular */
757927f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(coll->leadBytePermutationTable != NULL){
758027f654740f2a26ad62a5c155af9199af9e69b889claireho                        sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
758127f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
7582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((sOrder & UCOL_PRIMARYMASK) > LVT) {
7583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if((sOrder & UCOL_PRIMARYMASK) > 0) {
7587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sInShifted = TRUE;
7588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sOrder &= UCOL_PRIMARYMASK;
7589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sInShifted = FALSE;
7594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sOrder &= UCOL_PRIMARYMASK;
7600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sInShifted = FALSE;
7601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder = ucol_IGetNextCE(coll, tColl, status);
7604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tOrder == UCOL_NO_MORE_CES) {
7605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
7608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* UCA amendment - ignore ignorables that follow shifted code points */
7609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(isContinuation(tOrder)) {
7611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
7612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(tInShifted) {
7613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
7614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            break;
7619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else { /* Just lower level values */
7621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(tInShifted) {
7622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* regular */
762927f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(coll->leadBytePermutationTable != NULL){
763027f654740f2a26ad62a5c155af9199af9e69b889claireho                        tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
763127f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
7632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((tOrder & UCOL_PRIMARYMASK) > LVT) {
7633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if((tOrder & UCOL_PRIMARYMASK) > 0) {
7637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tInShifted = TRUE;
7638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tOrder &= UCOL_PRIMARYMASK;
7639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tInShifted = FALSE;
7644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tOrder &= UCOL_PRIMARYMASK;
7650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tInShifted = FALSE;
7651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sOrder == tOrder) {
7653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
7654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(doHiragana && hirResult == UCOL_EQUAL) {
7655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_HIRAGANA)) {
7656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA))
7657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                ? UCOL_LESS:UCOL_GREATER;
7658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
7661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
7662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = 0;
7665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = 0;
7666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
7669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER;
7670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } /* no primary difference... do the rest from the buffers */
7673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* now, we're gonna reexamine collected CEs */
7676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *sCE;
7677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *tCE;
7678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* This is the secondary level of comparison */
7680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(checkSecTer) {
7681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(!isFrenchSec) { /* normal */
7682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sCE = sCEs.buf;
7683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tCE = tCEs.buf;
7684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (secS == 0) {
7686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = *(sCE++) & UCOL_SECONDARYMASK;
7687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(secT == 0) {
7690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT = *(tCE++) & UCOL_SECONDARYMASK;
7691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(secS == secT) {
7694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(secS == UCOL_NO_MORE_CES_SECONDARY) {
7695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = 0; secT = 0;
7698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        continue;
7699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
7702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto commonReturn;
7703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else { /* do the French */
7706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t *sCESave = NULL;
7707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t *tCESave = NULL;
7708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sCE = sCEs.pos-2; /* this could also be sCEs-- if needs to be optimized */
7709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tCE = tCEs.pos-2;
7710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (secS == 0 && sCE >= sCEs.buf) {
771227f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(sCESave == NULL) {
7713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = *(sCE--);
7714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(isContinuation(secS)) {
7715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while(isContinuation(secS = *(sCE--)))
7716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                ;
7717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* after this, secS has the start of continuation, and sCEs points before that */
7718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sCESave = sCE; /* we save it, so that we know where to come back AND that we need to go forward */
7719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sCE+=2;  /* need to point to the first continuation CP */
7720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* However, now you can just continue doing stuff */
7721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = *(sCE++);
7724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(!isContinuation(secS)) { /* This means we have finished with this cont */
7725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sCE = sCESave;            /* reset the pointer to before continuation */
772627f654740f2a26ad62a5c155af9199af9e69b889claireho                            sCESave = NULL;
772727f654740f2a26ad62a5c155af9199af9e69b889claireho                            secS = 0;  /* Fetch a fresh CE before the continuation sequence. */
7728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS &= UCOL_SECONDARYMASK; /* remove the continuation bit */
7732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(secT == 0 && tCE >= tCEs.buf) {
773527f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(tCESave == NULL) {
7736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT = *(tCE--);
7737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(isContinuation(secT)) {
7738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while(isContinuation(secT = *(tCE--)))
7739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                ;
7740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* after this, secS has the start of continuation, and sCEs points before that */
7741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tCESave = tCE; /* we save it, so that we know where to come back AND that we need to go forward */
7742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tCE+=2;  /* need to point to the first continuation CP */
7743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* However, now you can just continue doing stuff */
7744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT = *(tCE++);
7747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(!isContinuation(secT)) { /* This means we have finished with this cont */
7748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tCE = tCESave;          /* reset the pointer to before continuation */
774927f654740f2a26ad62a5c155af9199af9e69b889claireho                            tCESave = NULL;
775027f654740f2a26ad62a5c155af9199af9e69b889claireho                            secT = 0;  /* Fetch a fresh CE before the continuation sequence. */
7751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT &= UCOL_SECONDARYMASK; /* remove the continuation bit */
7755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(secS == secT) {
7758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(secS == UCOL_NO_MORE_CES_SECONDARY || (sCE < sCEs.buf && tCE < tCEs.buf)) {
7759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = 0; secT = 0;
7762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        continue;
7763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
7766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto commonReturn;
7767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* doing the case bit */
7773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(checkCase) {
7774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sCE = sCEs.buf;
7775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tCE = tCEs.buf;
7776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
7777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while((secS & UCOL_REMOVE_CASE) == 0) {
7778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isContinuation(*sCE++)) {
7779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS =*(sCE-1);
7780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(((secS & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
7781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // primary ignorables should not be considered on the case level when the strength is primary
7782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // otherwise, the CEs stop being well-formed
7783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS &= UCOL_TERT_CASE_MASK;
7784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS ^= caseSwitch;
7785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = 0;
7787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = 0;
7790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while((secT & UCOL_REMOVE_CASE) == 0) {
7794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isContinuation(*tCE++)) {
7795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT = *(tCE-1);
7796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(((secT & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
7797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // primary ignorables should not be considered on the case level when the strength is primary
7798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // otherwise, the CEs stop being well-formed
7799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT &= UCOL_TERT_CASE_MASK;
7800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT ^= caseSwitch;
7801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT = 0;
7803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT = 0;
7806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) {
7810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = UCOL_LESS;
7811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK)) {
7813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = UCOL_GREATER;
7814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY || (secT & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) {
7818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
7819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secS = 0;
7821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secT = 0;
7822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Tertiary level */
7827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(checkTertiary) {
7828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secS = 0;
7829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secT = 0;
7830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sCE = sCEs.buf;
7831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tCE = tCEs.buf;
7832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
7833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while((secS & UCOL_REMOVE_CASE) == 0) {
7834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secS = *(sCE++) & tertiaryMask;
7835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isContinuation(secS)) {
7836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS ^= caseSwitch;
7837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS &= UCOL_REMOVE_CASE;
7839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while((secT & UCOL_REMOVE_CASE)  == 0) {
7843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secT = *(tCE++) & tertiaryMask;
7844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isContinuation(secT)) {
7845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT ^= caseSwitch;
7846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT &= UCOL_REMOVE_CASE;
7848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(secS == secT) {
7852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if((secS & UCOL_REMOVE_CASE) == 1) {
7853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = 0; secT = 0;
7856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
7860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(qShifted /*checkQuad*/) {
7867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UBool sInShifted = TRUE;
7868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UBool tInShifted = TRUE;
7869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secS = 0;
7870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secT = 0;
7871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sCE = sCEs.buf;
7872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tCE = tCEs.buf;
7873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
787427f654740f2a26ad62a5c155af9199af9e69b889claireho            while((secS == 0 && secS != UCOL_NO_MORE_CES) || (isContinuation(secS) && !sInShifted)) {
7875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secS = *(sCE++);
7876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(isContinuation(secS)) {
7877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!sInShifted) {
7878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        continue;
7879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(secS > LVT || (secS & UCOL_PRIMARYMASK) == 0) { /* non continuation */
7881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = UCOL_PRIMARYMASK;
7882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sInShifted = FALSE;
7883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sInShifted = TRUE;
7885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            secS &= UCOL_PRIMARYMASK;
7888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
789027f654740f2a26ad62a5c155af9199af9e69b889claireho            while((secT == 0 && secT != UCOL_NO_MORE_CES) || (isContinuation(secT) && !tInShifted)) {
7891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secT = *(tCE++);
7892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(isContinuation(secT)) {
7893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!tInShifted) {
7894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        continue;
7895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(secT > LVT || (secT & UCOL_PRIMARYMASK) == 0) {
7897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT = UCOL_PRIMARYMASK;
7898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tInShifted = FALSE;
7899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tInShifted = TRUE;
7901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            secT &= UCOL_PRIMARYMASK;
7904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(secS == secT) {
7906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(secS == UCOL_NO_MORE_CES_PRIMARY) {
7907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = 0; secT = 0;
7910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
7914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(doHiragana && hirResult != UCOL_EQUAL) {
7918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // If we're fine on quaternaries, we might be different
7919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // on Hiragana. This, however, might fail us in shifted.
7920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result = hirResult;
7921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        goto commonReturn;
7922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*  For IDENTICAL comparisons, we use a bitwise character comparison */
7925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*  as a tiebreaker if all else is equal.                                */
7926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*  Getting here  should be quite rare - strings are not identical -     */
7927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*     that is checked first, but compared == through all other checks.  */
7928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(checkIdent)
7929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
7930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //result = ucol_checkIdent(&sColl, &tColl, coll->normalizationMode == UCOL_ON);
7931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = ucol_checkIdent(sColl, tColl, TRUE, status);
7932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucommonReturn:
7935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((sColl->flags | tColl->flags) & UCOL_ITER_ALLOCATED) {
7936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (sCEs.buf != sCEs.localArray ) {
7937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(sCEs.buf);
7938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (tCEs.buf != tCEs.localArray ) {
7940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(tCEs.buf);
7941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
7945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
794750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult
794850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoucol_strcollRegular(const UCollator *coll,
794950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *source, int32_t sourceLength,
795050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *target, int32_t targetLength,
795150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UErrorCode *status) {
795250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    collIterate sColl, tColl;
795350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Preparing the context objects for iterating over strings
795450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, source, sourceLength, &sColl, status);
795550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, target, targetLength, &tColl, status);
795650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
795750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return UCOL_LESS;
795850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
795950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return ucol_strcollRegular(&sColl, &tColl, status);
796050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
7961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline uint32_t
7963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
7964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                          uint32_t CE, const UChar *s, int32_t *index, int32_t len)
7965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
7966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
7967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;
7968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t offset = 1;
7969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar schar = 0, tchar = 0;
7970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for(;;) {
7972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(len == -1) {
7973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(s[*index] == 0) { // end of string
7974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
7975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                schar = s[*index];
7977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
7979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(*index == len) {
7980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
7981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                schar = s[*index];
7983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(schar > (tchar = *(UCharOffset+offset))) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
7987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            offset++;
7988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (schar == tchar) {
7991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            (*index)++;
7992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset+offset]);
7993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        else
7995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
7996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(schar & 0xFF00 /*> UCOL_ENDOFLATIN1RANGE*/) {
7997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return UCOL_BAIL_OUT_CE;
7998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // skip completely ignorables
8000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
8001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(isZeroCE == 0) { // we have to ignore completely ignorables
8002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                (*index)++;
8003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                continue;
8004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
8007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
8008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
8013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is a fast strcoll, geared towards text in Latin-1.
8014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It supports contractions of size two, French secondaries
8015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and case switching. You can use it with strengths primary
8016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to tertiary. It does not support shifted and case level.
8017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It relies on the table build by setupLatin1Table. If it
8018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * doesn't understand something, it will go to the regular
8019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * strcoll.
8020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
802150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult
8022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcollUseLatin1( const UCollator    *coll,
8023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UChar        *source,
8024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t            sLen,
8025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UChar        *target,
8026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t            tLen,
8027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode *status)
8028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
8029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ALIGN_CODE(16);
8030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t strength = coll->strength;
8031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sIndex = 0, tIndex = 0;
8033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar sChar = 0, tChar = 0;
8034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sOrder=0, tOrder=0;
8035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool endOfSource = FALSE;
8037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t *elements = coll->latinOneCEs;
8039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool haveContractions = FALSE; // if we have contractions in our string
8041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    // we cannot do French secondary
8042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Do the primary level
8044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(;;) {
8045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(sOrder==0) { // this loop skips primary ignorables
8046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // sOrder=getNextlatinOneCE(source);
8047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sLen==-1) {   // handling zero terminated strings
8048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sChar=source[sIndex++];
8049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sChar==0) {
8050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endOfSource = TRUE;
8051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
8052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {        // handling strings with known length
8054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sIndex==sLen) {
8055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endOfSource = TRUE;
8056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
8057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sChar=source[sIndex++];
8059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
8061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //fprintf(stderr, "R");
806250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
8063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sOrder = elements[sChar];
8065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sOrder >= UCOL_NOT_FOUND) { // if we got a special
8066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // specials can basically be either contractions or bail-out signs. If we get anything
8067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // else, we'll bail out anywasy
8068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(getCETag(sOrder) == CONTRACTION_TAG) {
8069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOrder, source, &sIndex, sLen);
8070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    haveContractions = TRUE; // if there are contractions, we cannot do French secondary
8071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // However, if there are contractions in the table, but we always use just one char,
8072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // we might be able to do French. This should be checked out.
8073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) {
8075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //fprintf(stderr, "S");
807650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
8077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
8080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(tOrder==0) {  // this loop skips primary ignorables
8082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // tOrder=getNextlatinOneCE(target);
8083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(tLen==-1) {    // handling zero terminated strings
8084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tChar=target[tIndex++];
8085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tChar==0) {
8086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(endOfSource) { // this is different than source loop,
8087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // as we already know that source loop is done here,
8088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // so we can either finish the primary loop if both
8089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // strings are done or anounce the result if only
8090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // target is done. Same below.
8091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto endOfPrimLoop;
8092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
8093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_GREATER;
8094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {          // handling strings with known length
8097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tIndex==tLen) {
8098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(endOfSource) {
8099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto endOfPrimLoop;
8100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
8101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_GREATER;
8102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tChar=target[tIndex++];
8105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
8107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //fprintf(stderr, "R");
810850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
8109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tOrder = elements[tChar];
8111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(tOrder >= UCOL_NOT_FOUND) {
8112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Handling specials, see the comments for source
8113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(getCETag(tOrder) == CONTRACTION_TAG) {
8114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOrder, target, &tIndex, tLen);
8115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    haveContractions = TRUE;
8116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) {
8118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //fprintf(stderr, "S");
811950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
8120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
8123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(endOfSource) { // source is finished, but target is not, say the result.
8124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return UCOL_LESS;
8125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
8126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(sOrder == tOrder) { // if we have same CEs, we continue the loop
8128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sOrder = 0; tOrder = 0;
8129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
8130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
8131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // compare current top bytes
8132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(((sOrder^tOrder)&0xFF000000)!=0) {
8133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // top bytes differ, return difference
8134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder < tOrder) {
8135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UCOL_LESS;
8136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(sOrder > tOrder) {
8137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UCOL_GREATER;
8138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
8140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // since we must return enum value
8141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // top bytes match, continue with following bytes
8144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sOrder<<=8;
8145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tOrder<<=8;
8146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruendOfPrimLoop:
8150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // after primary loop, we definitely know the sizes of strings,
8151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // so we set it and use simpler loop for secondaries and tertiaries
8152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    sLen = sIndex; tLen = tIndex;
8153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(strength >= UCOL_SECONDARY) {
8154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // adjust the table beggining
8155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        elements += coll->latinOneTableLen;
8156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        endOfSource = FALSE;
8157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->frenchCollation == UCOL_OFF) { // non French
8159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // This loop is a simplified copy of primary loop
8160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // at this point we know that whole strings are latin-1, so we don't
8161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // check for that. We also know that we only have contractions as
8162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // specials.
8163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sIndex = 0; tIndex = 0;
8164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
8165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(sOrder==0) {
8166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(sIndex==sLen) {
8167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        endOfSource = TRUE;
8168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
8169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sChar=source[sIndex++];
8171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = elements[sChar];
8172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(sOrder > UCOL_NOT_FOUND) {
8173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, sOrder, source, &sIndex, sLen);
8174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(tOrder==0) {
8178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tIndex==tLen) {
8179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(endOfSource) {
8180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto endOfSecLoop;
8181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
8182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_GREATER;
8183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
8184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tChar=target[tIndex++];
8186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = elements[tChar];
8187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tOrder > UCOL_NOT_FOUND) {
8188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, tOrder, target, &tIndex, tLen);
8189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(endOfSource) {
8192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UCOL_LESS;
8193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == tOrder) {
8196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = 0; tOrder = 0;
8197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
8198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
8199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // see primary loop for comments on this
8200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(((sOrder^tOrder)&0xFF000000)!=0) {
8201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(sOrder < tOrder) {
8202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_LESS;
8203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if(sOrder > tOrder) {
8204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_GREATER;
8205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
8206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder<<=8;
8208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder<<=8;
8209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else { // French
8212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(haveContractions) { // if we have contractions, we have to bail out
8213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // since we don't really know how to handle them here
821450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
8215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // For French, we go backwards
8217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sIndex = sLen; tIndex = tLen;
8218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
8219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(sOrder==0) {
8220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(sIndex==0) {
8221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        endOfSource = TRUE;
8222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
8223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sChar=source[--sIndex];
8225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = elements[sChar];
8226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // don't even look for contractions
8227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(tOrder==0) {
8230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tIndex==0) {
8231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(endOfSource) {
8232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto endOfSecLoop;
8233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
8234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_GREATER;
8235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
8236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tChar=target[--tIndex];
8238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = elements[tChar];
8239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // don't even look for contractions
8240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(endOfSource) {
8242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UCOL_LESS;
8243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == tOrder) {
8246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = 0; tOrder = 0;
8247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
8248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
8249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // see the primary loop for comments
8250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(((sOrder^tOrder)&0xFF000000)!=0) {
8251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(sOrder < tOrder) {
8252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_LESS;
8253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if(sOrder > tOrder) {
8254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_GREATER;
8255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
8256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder<<=8;
8258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder<<=8;
8259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruendOfSecLoop:
8265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(strength >= UCOL_TERTIARY) {
8266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // tertiary loop is the same as secondary (except no French)
8267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        elements += coll->latinOneTableLen;
8268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sIndex = 0; tIndex = 0;
8269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        endOfSource = FALSE;
8270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
8271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while(sOrder==0) {
8272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sIndex==sLen) {
8273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endOfSource = TRUE;
8274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
8275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sChar=source[sIndex++];
8277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder = elements[sChar];
8278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder > UCOL_NOT_FOUND) {
8279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sOrder, source, &sIndex, sLen);
8280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while(tOrder==0) {
8283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tIndex==tLen) {
8284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(endOfSource) {
8285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_EQUAL; // if both strings are at the end, they are equal
8286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
8287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_GREATER;
8288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tChar=target[tIndex++];
8291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder = elements[tChar];
8292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tOrder > UCOL_NOT_FOUND) {
8293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tOrder, target, &tIndex, tLen);
8294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(endOfSource) {
8297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return UCOL_LESS;
8298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sOrder == tOrder) {
8300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder = 0; tOrder = 0;
8301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                continue;
8302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
8303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(((sOrder^tOrder)&0xff000000)!=0) {
8304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(sOrder < tOrder) {
8305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_LESS;
8306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else if(sOrder > tOrder) {
8307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_GREATER;
8308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
8309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
8310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder<<=8;
8311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder<<=8;
8312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return UCOL_EQUAL;
8316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationResult U_EXPORT2
8320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcollIter( const UCollator    *coll,
8321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 UCharIterator *sIter,
8322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 UCharIterator *tIter,
8323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 UErrorCode         *status)
8324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
8325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(!status || U_FAILURE(*status)) {
8326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
8327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
8330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
8331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (sIter == tIter) {
8333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
8334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
8335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(sIter == NULL || tIter == NULL || coll == NULL) {
8337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
8338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
8339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
8340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UCollationResult result = UCOL_EQUAL;
8343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Preparing the context objects for iterating over strings
8345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collIterate sColl, tColl;
834650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, NULL, -1, &sColl, status);
834750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, NULL, -1, &tColl, status);
834850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
834950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
835050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return UCOL_EQUAL;
835150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
8352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // The division for the array length may truncate the array size to
8353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
8354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // for all platforms anyway.
8355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
8356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
8357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UNormIterator *sNormIter = NULL, *tNormIter = NULL;
8358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    sColl.iterator = sIter;
8360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    sColl.flags |= UCOL_USE_ITERATOR;
8361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    tColl.flags |= UCOL_USE_ITERATOR;
8362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    tColl.iterator = tIter;
8363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) {
8365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
8366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status);
8367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sColl.flags &= ~UCOL_ITER_NORM;
8368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
8370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status);
8371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tColl.flags &= ~UCOL_ITER_NORM;
8372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;
8375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    while((sChar = sColl.iterator->next(sColl.iterator)) ==
8377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        (tChar = tColl.iterator->next(tColl.iterator))) {
8378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sChar == U_SENTINEL) {
8379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = UCOL_EQUAL;
8380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto end_compare;
8381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(sChar == U_SENTINEL) {
8385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tChar = tColl.iterator->previous(tColl.iterator);
8386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(tChar == U_SENTINEL) {
8389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sChar = sColl.iterator->previous(sColl.iterator);
8390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    sChar = sColl.iterator->previous(sColl.iterator);
8393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    tChar = tColl.iterator->previous(tColl.iterator);
8394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (ucol_unsafeCP((UChar)sChar, coll) || ucol_unsafeCP((UChar)tChar, coll))
8396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
8397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // We are stopped in the middle of a contraction.
8398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Scan backwards through the == part of the string looking for the start of the contraction.
8399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   It doesn't matter which string we scan, since they are the same in this region.
8400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        do
8401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
8402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sChar = sColl.iterator->previous(sColl.iterator);
8403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tChar = tColl.iterator->previous(tColl.iterator);
8404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
8405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll));
8406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_SUCCESS(*status)) {
8410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result = ucol_strcollRegular(&sColl, &tColl, status);
8411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruend_compare:
8414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(sNormIter || tNormIter) {
8415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(sNormIter);
8416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(tNormIter);
8417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_EXIT_VALUE_STATUS(result, *status)
8420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return result;
8421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                      */
8425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_strcoll     Main public API string comparison function          */
8426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                      */
8427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationResult U_EXPORT2
8428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcoll( const UCollator    *coll,
8429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UChar        *source,
8430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t            sourceLength,
8431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UChar        *target,
8432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru              int32_t            targetLength)
8433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
8434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ALIGN_CODE(16);
8435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
8437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
8438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
8439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
8440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
8441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(source == NULL || target == NULL) {
8444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // do not crash, but return. Should have
8445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // status argument to return error.
8446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_VALUE(UCOL_EQUAL);
8447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
8448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* Quick check if source and target are same strings. */
8451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* They should either both be NULL terminated or the explicit length should be set on both. */
8452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (source==target && sourceLength==targetLength) {
8453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_VALUE(UCOL_EQUAL);
8454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
8455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Scan the strings.  Find:                                                             */
8458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*    The length of any leading portion that is equal                                   */
8459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*    Whether they are exactly equal.  (in which case we just return)                   */
8460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar    *pSrc    = source;
8461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar    *pTarg   = target;
8462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t        equalLength;
8463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sourceLength == -1 && targetLength == -1) {
8465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Both strings are null terminated.
8466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //    Scan through any leading equal portion.
8467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while (*pSrc == *pTarg && *pSrc != 0) {
8468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pSrc++;
8469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pTarg++;
8470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (*pSrc == 0 && *pTarg == 0) {
8472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UTRACE_EXIT_VALUE(UCOL_EQUAL);
8473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return UCOL_EQUAL;
8474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
847550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        equalLength = (int32_t)(pSrc - source);
8476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else
8478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
8479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // One or both strings has an explicit length.
8480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar    *pSrcEnd = source + sourceLength;
8481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar    *pTargEnd = target + targetLength;
8482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Scan while the strings are bitwise ==, or until one is exhausted.
8484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for (;;) {
8485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (pSrc == pSrcEnd || pTarg == pTargEnd) {
8486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
8487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLength == -1)) {
8489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
8490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (*pSrc != *pTarg) {
8492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
8493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            pSrc++;
8495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            pTarg++;
8496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
849750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        equalLength = (int32_t)(pSrc - source);
8498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // If we made it all the way through both strings, we are done.  They are ==
8500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if ((pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0))  &&   /* At end of src string, however it was specified. */
8501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))     /* and also at end of dest string                  */
8502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
8503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UTRACE_EXIT_VALUE(UCOL_EQUAL);
8504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return UCOL_EQUAL;
8505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
8506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (equalLength > 0) {
8508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* There is an identical portion at the beginning of the two strings.        */
8509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*   If the identical portion ends within a contraction or a comibining      */
8510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*   character sequence, back up to the start of that sequence.              */
8511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // These values should already be set by the code above.
8513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //pSrc  = source + equalLength;        /* point to the first differing chars   */
8514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //pTarg = target + equalLength;
851527f654740f2a26ad62a5c155af9199af9e69b889claireho        if ((pSrc  != source+sourceLength && ucol_unsafeCP(*pSrc, coll)) ||
851627f654740f2a26ad62a5c155af9199af9e69b889claireho            (pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll)))
8517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
8518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We are stopped in the middle of a contraction.
8519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Scan backwards through the == part of the string looking for the start of the contraction.
8520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   It doesn't matter which string we scan, since they are the same in this region.
8521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do
8522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
8523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                equalLength--;
8524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pSrc--;
8525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while (equalLength>0 && ucol_unsafeCP(*pSrc, coll));
8527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source += equalLength;
8530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        target += equalLength;
8531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (sourceLength > 0) {
8532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sourceLength -= equalLength;
8533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (targetLength > 0) {
8535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            targetLength -= equalLength;
8536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
8540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UCollationResult returnVal;
8541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) {
854250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targetLength, &status);
8543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
8544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);
8545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_EXIT_VALUE(returnVal);
8547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return returnVal;
8548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */
8551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2
8552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_greater(    const    UCollator        *coll,
8553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar            *source,
8554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t            sourceLength,
8555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar            *target,
8556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t            targetLength)
8557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
8558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
8559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        == UCOL_GREATER);
8560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */
8563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2
8564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_greaterOrEqual(    const    UCollator    *coll,
8565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const    UChar        *source,
8566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t        sourceLength,
8567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const    UChar        *target,
8568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t        targetLength)
8569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
8570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
8571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        != UCOL_LESS);
8572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */
8575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2
8576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_equal(        const    UCollator        *coll,
8577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const    UChar            *source,
8578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t            sourceLength,
8579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const    UChar            *target,
8580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t            targetLength)
8581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
8582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
8583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        == UCOL_EQUAL);
8584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
8587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
8588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll && coll->UCA) {
8589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo));
8590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */
8594