1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 1996-2011, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  ucol.cpp
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   US-ASCII
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   tab size:   8 (not used)
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification history
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date        Name      Comments
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 1996-1999   various members of ICU team maintained C API for collation framework
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 02/16/2001  synwee    Added internal method getPrevSpecialCE
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/01/2001  synwee    Added maxexpansion functionality.
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestream.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/coleitr.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unorm.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_imp.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "bocsu.h"
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unorm_it.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h"
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h"
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "utracimp.h"
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "putilimp.h"
40c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "uassert.h"
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LAST_BYTE_MASK_           0xFF
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SECOND_LAST_BYTE_SHIFT_   8
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define ZERO_CC_LIMIT_            0xC0
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// this is static pointer to the normalizer fcdTrieIndex
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// it is always the same between calls to u_cleanup
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and therefore writing to it is not synchronized.
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is cleaned in ucol_cleanup
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const uint16_t *fcdTrieIndex=NULL;
60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Code points at fcdHighStart and above have a zero FCD value.
61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic UChar32 fcdHighStart = 0;
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// These are values from UCA required for
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// implicit generation and supressing sort key compression
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// they should regularly be in the UCA, but if one
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// is running without UCA, it could be a problem
6727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const int32_t maxRegularPrimary  = 0x7A;
68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t minImplicitPrimary = 0xE0;
69c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t maxImplicitPrimary = 0xE4;
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool U_CALLCONV
73c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruucol_cleanup(void)
74c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
75c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fcdTrieIndex = NULL;
76c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return TRUE;
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t U_CALLCONV
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_getFoldingOffset(uint32_t data) {
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (int32_t)(data&0xFFFFFF);
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8627f654740f2a26ad62a5c155af9199af9e69b889claireho// init FCD data
8727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline
8827f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool initializeFCD(UErrorCode *status) {
8927f654740f2a26ad62a5c155af9199af9e69b889claireho    if (fcdTrieIndex != NULL) {
9027f654740f2a26ad62a5c155af9199af9e69b889claireho        return TRUE;
9127f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
9227f654740f2a26ad62a5c155af9199af9e69b889claireho        // The result is constant, until the library is reloaded.
9327f654740f2a26ad62a5c155af9199af9e69b889claireho        fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
9427f654740f2a26ad62a5c155af9199af9e69b889claireho        ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
9527f654740f2a26ad62a5c155af9199af9e69b889claireho        return U_SUCCESS(*status);
9627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
9727f654740f2a26ad62a5c155af9199af9e69b889claireho}
9827f654740f2a26ad62a5c155af9199af9e69b889claireho
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruinline void IInit_collIterate(const UCollator *collator, const UChar *sourceString,
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                              int32_t sourceLen, collIterate *s,
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                              UErrorCode *status)
103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    (s)->string = (s)->pos = sourceString;
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->origFlags = 0;
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->flags = 0;
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sourceLen >= 0) {
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s->flags |= UCOL_ITER_HASLEN;
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (s)->endp = (UChar *)sourceString+sourceLen;
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* change to enable easier checking for end of string for fcdpositon */
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (s)->endp = NULL;
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->extendCEs = NULL;
116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->extendCEsSize = 0;
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->CEpos = (s)->toReturn = (s)->CEs;
118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->offsetBuffer = NULL;
119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->offsetBufferSize = 0;
120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->offsetReturn = (s)->offsetStore = NULL;
121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0;
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->coll = (collator);
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    (s)->nfd = Normalizer2Factory::getNFDInstance(*status);
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->fcdPosition = 0;
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(collator->normalizationMode == UCOL_ON) {
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (s)->flags |= UCOL_ITER_NORM;
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) {
129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        (s)->flags |= UCOL_HIRAGANA_Q;
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (s)->iterator = NULL;
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //(s)->iteratorIndex = 0;
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void  U_EXPORT2
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_init_collIterate(const UCollator *collator, const UChar *sourceString,
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             int32_t sourceLen, collIterate *s,
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UErrorCode *status) {
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Out-of-line version for use from other files. */
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(collator, sourceString, sourceLen, s, status);
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI collIterate * U_EXPORT2
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_new_collIterate(UErrorCode *status) {
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    collIterate *s = new collIterate;
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(s == NULL) {
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *status = U_MEMORY_ALLOCATION_ERROR;
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return s;
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
15650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI void U_EXPORT2
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_delete_collIterate(collIterate *s) {
15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete s;
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
16150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UBool U_EXPORT2
16250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouprv_collIterateAtEnd(collIterate *s) {
16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return s == NULL || s->pos == s->endp;
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Backup the state of the collIterate struct data
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate to backup
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param backup storage
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void backupState(const collIterate *data, collIterateState *backup)
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->fcdPosition = data->fcdPosition;
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->flags       = data->flags;
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->origFlags   = data->origFlags;
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->pos         = data->pos;
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    backup->bufferaddress = data->writableBuffer.getBuffer();
17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    backup->buffersize    = data->writableBuffer.length();
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->iteratorMove = 0;
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    backup->iteratorIndex = 0;
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(data->iterator != NULL) {
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER_CURRENT);
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        backup->iteratorIndex = data->iterator->getState(data->iterator);
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // no we try to fixup if we're using a normalizing iterator and we get UITER_NO_STATE
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(backup->iteratorIndex == UITER_NO_STATE) {
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) {
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                backup->iteratorMove++;
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->iterator->move(data->iterator, -1, UITER_CURRENT);
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Loads the state into the collIterate struct data
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate to backup
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param backup storage
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param forwards boolean to indicate if forwards iteration is used,
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*        false indicates backwards iteration
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void loadState(collIterate *data, const collIterateState *backup,
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UBool        forwards)
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags       = backup->flags;
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->origFlags   = backup->origFlags;
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(data->iterator != NULL) {
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO);
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->iterator->setState(data->iterator, backup->iteratorIndex, &status);
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(backup->iteratorMove != 0) {
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->pos         = backup->pos;
218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & UCOL_ITER_INNORMBUF) &&
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->writableBuffer.getBuffer() != backup->bufferaddress) {
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        this is when a new buffer has been reallocated and we'll have to
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        calculate the new position.
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        note the new buffer has to contain the contents of the old buffer.
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (forwards) {
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            data->pos = data->writableBuffer.getTerminatedBuffer() +
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         (data->pos - backup->bufferaddress);
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* backwards direction */
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t temp = backup->buffersize -
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  (int32_t)(data->pos - backup->bufferaddress);
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writableBuffer.length() - temp);
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        this is alittle tricky.
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if we are initially not in the normalization buffer, even if we
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        normalize in the later stage, the data in the buffer will be
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ignored, since we skip back up to the data string.
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        however if we are already in the normalization buffer, any
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        further normalization will pull data into the normalization
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buffer and modify the fcdPosition.
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        since we are keeping the data in the buffer for use, the
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fcdPosition can not be reverted back.
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        arrgghh....
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->fcdPosition = backup->fcdPosition;
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
25450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoreallocCEs(collIterate *data, int32_t newCapacity) {
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t *oldCEs = data->extendCEs;
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(oldCEs == NULL) {
25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCEs = data->CEs;
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length = data->CEpos - oldCEs;
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t *newCEs = (uint32_t *)uprv_malloc(newCapacity * 4);
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(newCEs == NULL) {
26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uprv_memcpy(newCEs, oldCEs, length * 4);
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uprv_free(data->extendCEs);
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->extendCEs = newCEs;
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->extendCEsSize = newCapacity;
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->CEpos = newCEs + length;
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
27350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoincreaseCEsCapacity(collIterate *data) {
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t oldCapacity;
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(data->extendCEs != NULL) {
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCapacity = data->extendCEsSize;
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCapacity = LENGTHOF(data->CEs);
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return reallocCEs(data, 2 * oldCapacity);
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoensureCEsCapacity(collIterate *data, int32_t minCapacity) {
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t oldCapacity;
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(data->extendCEs != NULL) {
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCapacity = data->extendCEsSize;
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        oldCapacity = LENGTHOF(data->CEs);
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(minCapacity <= oldCapacity) {
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    oldCapacity *= 2;
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacity);
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29927f654740f2a26ad62a5c155af9199af9e69b889clairehovoid collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) {
30027f654740f2a26ad62a5c155af9199af9e69b889claireho    if(U_FAILURE(errorCode)) {
30127f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
30227f654740f2a26ad62a5c155af9199af9e69b889claireho    }
30327f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuffer);
30427f654740f2a26ad62a5c155af9199af9e69b889claireho    if(length >= offsetBufferSize) {
30527f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE;
30627f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t *newBuffer = reinterpret_cast<int32_t *>(uprv_malloc(newCapacity * 4));
30727f654740f2a26ad62a5c155af9199af9e69b889claireho        if(newBuffer == NULL) {
30827f654740f2a26ad62a5c155af9199af9e69b889claireho            errorCode = U_MEMORY_ALLOCATION_ERROR;
30927f654740f2a26ad62a5c155af9199af9e69b889claireho            return;
31027f654740f2a26ad62a5c155af9199af9e69b889claireho        }
31127f654740f2a26ad62a5c155af9199af9e69b889claireho        if(length > 0) {
31227f654740f2a26ad62a5c155af9199af9e69b889claireho            uprv_memcpy(newBuffer, offsetBuffer, length * 4);
31327f654740f2a26ad62a5c155af9199af9e69b889claireho        }
31427f654740f2a26ad62a5c155af9199af9e69b889claireho        uprv_free(offsetBuffer);
31527f654740f2a26ad62a5c155af9199af9e69b889claireho        offsetBuffer = newBuffer;
31627f654740f2a26ad62a5c155af9199af9e69b889claireho        offsetStore = offsetBuffer + length;
31727f654740f2a26ad62a5c155af9199af9e69b889claireho        offsetBufferSize = newCapacity;
31827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
31927f654740f2a26ad62a5c155af9199af9e69b889claireho    *offsetStore++ = offset;
32027f654740f2a26ad62a5c155af9199af9e69b889claireho}
32127f654740f2a26ad62a5c155af9199af9e69b889claireho
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collIter_eos()
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*     Checks for a collIterate being positioned at the end of
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*     its source string.
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_eos(collIterate *s) {
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s->flags & UCOL_USE_ITERATOR) {
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return !(s->iterator->hasNext(s->iterator));
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) {
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Null terminated string, but not at null, so not at end.
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   Whether in main or normalization buffer doesn't matter.
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // String with length.  Can't be in normalization buffer, which is always
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  null termintated.
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s->flags & UCOL_ITER_HASLEN) {
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (s->pos == s->endp);
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We are at a null termination, could be either normalization buffer or main string.
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((s->flags & UCOL_ITER_INNORMBUF) == 0) {
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // At null at end of main string.
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return TRUE;
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // At null at end of normalization buffer.  Need to check whether there there are
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   any characters left in the main buffer.
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s->origFlags & UCOL_USE_ITERATOR) {
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return !(s->iterator->hasNext(s->iterator));
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) {
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Null terminated main string.  fcdPosition is the 'return' position into main buf.
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (*s->fcdPosition == 0);
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Main string with an end pointer.
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return s->fcdPosition == s->endp;
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collIter_bos()
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*     Checks for a collIterate being positioned at the start of
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*     its source string.
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_bos(collIterate *source) {
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // if we're going backwards, we need to know whether there is more in the
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // iterator, even if we are in the side buffer
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) {
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return !source->iterator->hasPrevious(source->iterator);
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if (source->pos <= source->string ||
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      ((source->flags & UCOL_ITER_INNORMBUF) &&
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *(source->pos - 1) == 0 && source->fcdPosition == NULL)) {
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return FALSE;
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIter_SimpleBos(collIterate *source) {
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // if we're going backwards, we need to know whether there is more in the
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // iterator, even if we are in the side buffer
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) {
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return !source->iterator->hasPrevious(source->iterator);
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if (source->pos == source->string) {
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return FALSE;
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //return (data->pos == data->string) ||
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the open/close functions                                   */
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                          */
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UCollator*
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_initFromBinary(const uint8_t *bin, int32_t length,
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UCollator *base,
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UCollator *fillIn,
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UErrorCode *status)
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollator *result = fillIn;
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status)) {
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(base == NULL) {
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // we don't support null base yet
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We need these and we could be running without UCA
424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uprv_uca_initImplicitConstants(status);
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCATableHeader *colData = (UCATableHeader *)bin;
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // do we want version check here? We're trying to figure out whether collators are compatible
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0)) ||
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        colData->version[0] != UCOL_BUILDER_VERSION)
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_COLLATOR_VERSION_MISMATCH;
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = ucol_initCollator((const UCATableHeader *)bin, result, base, status);
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(U_FAILURE(*status)){
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result->hasRealData = TRUE;
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(base) {
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                result = ucol_initCollator(base->image, result, base, status);
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const UCATableHeader *)bin)->options), status);
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(U_FAILURE(*status)){
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                result->hasRealData = FALSE;
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else {
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *status = U_USELESS_COLLATOR_ERROR;
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result->freeImageOnClose = FALSE;
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->actualLocale = NULL;
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->validLocale = NULL;
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->requestedLocale = NULL;
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->rules = NULL;
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->rulesLength = 0;
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->freeRulesOnClose = FALSE;
464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->ucaRules = NULL;
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollator* U_EXPORT2
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_openBinary(const uint8_t *bin, int32_t length,
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UCollator *base,
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UErrorCode *status)
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ucol_initFromBinary(bin, length, base, NULL, status);
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruucol_cloneBinary(const UCollator *coll,
478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 uint8_t *buffer, int32_t capacity,
479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 UErrorCode *status)
480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t length = 0;
482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status)) {
483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return length;
484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(capacity < 0) {
486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return length;
488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll->hasRealData == TRUE) {
490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        length = coll->image->size;
491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(length <= capacity) {
492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(buffer, coll->image, length);
493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_BUFFER_OVERFLOW_ERROR;
495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(length <= capacity) {
499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* build the UCATableHeader with minimal entries */
500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* do not copy the header from the UCA file because its values are wrong! */
501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* reset everything */
504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memset(buffer, 0, length);
505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* set the tailoring-specific values */
507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UCATableHeader *myData = (UCATableHeader *)buffer;
508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->size = length;
509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* offset for the options, the only part of the data that is present after the header */
511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->options = sizeof(UCATableHeader);
512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* need to always set the expansion value for an upper bound of the options */
514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->expansion = myData->options + sizeof(UColOptionSet);
515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->magic = UCOL_HEADER_MAGIC;
517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->isBigEndian = U_IS_BIG_ENDIAN;
518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->charSetFamily = U_CHARSET_FAMILY;
519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* copy UCA's version; genrb will override all but the builder version with tailoring data */
521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            myData->jamoSpecial = coll->image->jamoSpecial;
527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* copy the collator options */
529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_BUFFER_OVERFLOW_ERROR;
532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return length;
535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollator* U_EXPORT2
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t * pBufferSize, UErrorCode *status)
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollator * localCollator;
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator);
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *stackBufferChars = (char *)stackBuffer;
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t imageSize = 0;
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t rulesSize = 0;
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t rulesPadding = 0;
546b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t defaultReorderCodesSize = 0;
547b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t reorderCodesSize = 0;
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *image;
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *rules;
550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t* defaultReorderCodes;
551b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t* reorderCodes;
552b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uint8_t* leadBytePermutationTable;
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool colAllocated = FALSE;
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool imageAllocated = FALSE;
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (status == NULL || U_FAILURE(*status)){
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((stackBuffer && !pBufferSize) || !coll){
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       *status = U_ILLEGAL_ARGUMENT_ERROR;
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
563b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (coll->rules && coll->freeRulesOnClose) {
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar);
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar));
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufferSizeNeeded += rulesSize + rulesPadding;
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
569b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // no padding for alignment needed from here since the next two are 4 byte quantities
570b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (coll->defaultReorderCodes) {
571b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        defaultReorderCodesSize = coll->defaultReorderCodesLength * sizeof(int32_t);
572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bufferSizeNeeded += defaultReorderCodesSize;
573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
574b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (coll->reorderCodes) {
575b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        reorderCodesSize = coll->reorderCodesLength * sizeof(int32_t);
576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bufferSizeNeeded += reorderCodesSize;
577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
578b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (coll->leadBytePermutationTable) {
579b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bufferSizeNeeded += 256 * sizeof(uint8_t);
580b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
581b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
582b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (stackBuffer && *pBufferSize <= 0) { /* 'preflighting' request - set needed size into *pBufferSize */
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pBufferSize =  bufferSizeNeeded;
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Pointers on 64-bit platforms need to be aligned
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * on a 64-bit boundry in memory.
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (*pBufferSize > offsetUp) {
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pBufferSize -= offsetUp;
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            stackBufferChars += offsetUp;
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pBufferSize = 1;
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    stackBuffer = (void *)stackBufferChars;
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (stackBuffer == NULL || *pBufferSize < bufferSizeNeeded) {
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* allocate one here...*/
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded);
606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Null pointer check.
607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (stackBufferChars == NULL) {
608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        colAllocated = TRUE;
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(*status)) {
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_SAFECLONE_ALLOCATED_WARNING;
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator = (UCollator *)stackBufferChars;
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding);
618b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    defaultReorderCodes = (int32_t*)((uint8_t*)rules + rulesSize);
619b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    reorderCodes = (int32_t*)((uint8_t*)defaultReorderCodes + defaultReorderCodesSize);
620b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    leadBytePermutationTable = (uint8_t*)reorderCodes + reorderCodesSize;
621b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode tempStatus = U_ZERO_ERROR;
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus);
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (coll->freeImageOnClose) {
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        image = (uint8_t *)uprv_malloc(imageSize);
628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Null pointer check
629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (image == NULL) {
630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucol_cloneBinary(coll, image, imageSize, status);
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        imageAllocated = TRUE;
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        image = (uint8_t *)coll->image;
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollator, status);
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(*status)) {
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (coll->rules) {
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (coll->freeRulesOnClose) {
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            localCollator->rules = u_strcpy(rules, coll->rules);
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //bufferEnd += rulesSize;
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            localCollator->rules = coll->rules;
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        localCollator->freeRulesOnClose = FALSE;
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        localCollator->rulesLength = coll->rulesLength;
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
655b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
656b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // collator reordering
657b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (coll->defaultReorderCodes) {
658b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        localCollator->defaultReorderCodes =
659b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            (int32_t*) uprv_memcpy(defaultReorderCodes, coll->defaultReorderCodes, coll->defaultReorderCodesLength * sizeof(int32_t));
660b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        localCollator->defaultReorderCodesLength = coll->defaultReorderCodesLength;
661b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        localCollator->freeDefaultReorderCodesOnClose = FALSE;
662b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
663b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (coll->reorderCodes) {
664b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        localCollator->reorderCodes =
665b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            (int32_t*)uprv_memcpy(reorderCodes, coll->reorderCodes, coll->reorderCodesLength * sizeof(int32_t));
666b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        localCollator->reorderCodesLength = coll->reorderCodesLength;
667b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        localCollator->freeReorderCodesOnClose = FALSE;
668b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
669b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (coll->leadBytePermutationTable) {
670b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        localCollator->leadBytePermutationTable =
671b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            (uint8_t*) uprv_memcpy(leadBytePermutationTable, coll->leadBytePermutationTable, 256);
672b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        localCollator->freeLeadBytePermutationTableOnClose = FALSE;
673b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(coll, (UColAttribute)i, status), status);
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // zero copies of pointers
680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    localCollator->actualLocale = NULL;
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator->validLocale = NULL;
682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    localCollator->requestedLocale = NULL;
683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    localCollator->ucaRules = coll->ucaRules; // There should only be one copy here.
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator->freeOnClose = colAllocated;
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    localCollator->freeImageOnClose = imageAllocated;
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return localCollator;
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_close(UCollator *coll)
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(coll != NULL) {
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // these are always owned by each UCollator struct,
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // so we always free them
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->validLocale != NULL) {
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll->validLocale);
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->actualLocale != NULL) {
701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_free(coll->actualLocale);
702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->requestedLocale != NULL) {
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll->requestedLocale);
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->latinOneCEs != NULL) {
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll->latinOneCEs);
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->options != NULL && coll->freeOptionsOnClose) {
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll->options);
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->rules != NULL && coll->freeRulesOnClose) {
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free((UChar *)coll->rules);
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->image != NULL && coll->freeImageOnClose) {
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free((UCATableHeader *)coll->image);
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
718b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
72027f654740f2a26ad62a5c155af9199af9e69b889claireho            uprv_free(coll->leadBytePermutationTable);
72127f654740f2a26ad62a5c155af9199af9e69b889claireho        }
722b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(coll->defaultReorderCodes != NULL && coll->freeDefaultReorderCodesOnClose == TRUE) {
723b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            uprv_free(coll->defaultReorderCodes);
724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) {
72627f654740f2a26ad62a5c155af9199af9e69b889claireho            uprv_free(coll->reorderCodes);
72727f654740f2a26ad62a5c155af9199af9e69b889claireho        }
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Here, it would be advisable to close: */
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* - UData for UCA (unless we stuff it in the root resb */
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Again, do we need additional housekeeping... HMMM! */
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose);
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->freeOnClose){
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* for safeClone, if freeOnClose is FALSE,
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            don't free the other instance data */
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(coll);
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_EXIT();
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This one is currently used by genrb & tests. After constructing from rules (tailoring),*/
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* you should be able to get the binary chunk to write out...  Doesn't look very full now */
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint8_t* U_EXPORT2
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status)
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t *result = NULL;
748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status)) {
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll->hasRealData == TRUE) {
752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *length = coll->image->size;
753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result = (uint8_t *)uprv_malloc(*length);
754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* test for NULL */
755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (result == NULL) {
756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(result, coll->image, *length);
760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result = (uint8_t *)uprv_malloc(*length);
763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* test for NULL */
764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (result == NULL) {
765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return NULL;
767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* build the UCATableHeader with minimal entries */
770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* do not copy the header from the UCA file because its values are wrong! */
771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* reset everything */
774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memset(result, 0, *length);
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* set the tailoring-specific values */
777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCATableHeader *myData = (UCATableHeader *)result;
778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->size = *length;
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* offset for the options, the only part of the data that is present after the header */
781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->options = sizeof(UCATableHeader);
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* need to always set the expansion value for an upper bound of the options */
784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->expansion = myData->options + sizeof(UColOptionSet);
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->magic = UCOL_HEADER_MAGIC;
787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->isBigEndian = U_IS_BIG_ENDIAN;
788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->charSetFamily = U_CHARSET_FAMILY;
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* copy UCA's version; genrb will override all but the builder version with tailoring data */
791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        myData->jamoSpecial = coll->image->jamoSpecial;
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* copy the collator options */
799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return result;
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) {
805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status)) {
806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->caseFirst = (UColAttributeValue)opts->caseFirst;
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->caseLevel = (UColAttributeValue)opts->caseLevel;
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->frenchCollation = (UColAttributeValue)opts->frenchCollation;
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->normalizationMode = (UColAttributeValue)opts->normalizationMode;
81227f654740f2a26ad62a5c155af9199af9e69b889claireho    if(result->normalizationMode == UCOL_ON && !initializeFCD(status)) {
81327f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
81427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->strength = (UColAttributeValue)opts->strength;
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->variableTopValue = opts->variableTopValue;
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->alternateHandling = (UColAttributeValue)opts->alternateHandling;
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->numericCollation = (UColAttributeValue)opts->numericCollation;
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->caseFirstisDefault = TRUE;
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->caseLevelisDefault = TRUE;
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->frenchCollationisDefault = TRUE;
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->normalizationModeisDefault = TRUE;
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->strengthisDefault = TRUE;
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->variableTopValueisDefault = TRUE;
82627f654740f2a26ad62a5c155af9199af9e69b889claireho    result->alternateHandlingisDefault = TRUE;
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->hiraganaQisDefault = TRUE;
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->numericCollationisDefault = TRUE;
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucol_updateInternalState(result, status);
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->options = opts;
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Approximate determination if a character is at a contraction end.
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Guaranteed to be TRUE if a character is at the end of a contraction,
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* otherwise it is not deterministic.
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param c character to be determined
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll collator
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c < coll->minContrEndCP) {
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t  hash = c;
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t  htbyte;
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (U16_IS_TRAIL(c)) {
853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return TRUE;
854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    htbyte = coll->contrEndCP[hash>>3];
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (((htbyte >> (hash & 7)) & 1) == 1);
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   i_getCombiningClass()
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*        A fast, at least partly inline version of u_getCombiningClass()
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*        This is a candidate for further optimization.  Used heavily
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*        in contraction processing.
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) {
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t sCC = 0;
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) {
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sCC = u_getCombiningClass(c);
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return sCC;
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status) {
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar c;
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollator *result = fillIn;
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status) || image == NULL) {
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(result == NULL) {
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = (UCollator *)uprv_malloc(sizeof(UCollator));
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(result == NULL) {
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return result;
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result->freeOnClose = TRUE;
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result->freeOnClose = FALSE;
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->image = image;
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->mapping.getFoldingOffset = _getFoldingOffset;
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosition;
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status)) {
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(result->freeOnClose == TRUE) {
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(result);
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = NULL;
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return result;
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping);
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->contractionCEs);
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex);
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion);
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->rules = NULL;
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->rulesLength = 0;
914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->freeRulesOnClose = FALSE;
915b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    result->defaultReorderCodes = NULL;
916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    result->defaultReorderCodesLength = 0;
917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    result->freeDefaultReorderCodesOnClose = FALSE;
91827f654740f2a26ad62a5c155af9199af9e69b889claireho    result->reorderCodes = NULL;
91927f654740f2a26ad62a5c155af9199af9e69b889claireho    result->reorderCodesLength = 0;
920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    result->freeReorderCodesOnClose = FALSE;
92127f654740f2a26ad62a5c155af9199af9e69b889claireho    result->leadBytePermutationTable = NULL;
922b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    result->freeLeadBytePermutationTableOnClose = FALSE;
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* get the version info from UCATableHeader and populate the Collator struct*/
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->dataVersion[2] = 0;
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->dataVersion[3] = 0;
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->minUnsafeCP = 0;
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (c=0; c<0x300; c++) {  // Find the smallest unsafe char.
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ucol_unsafeCP(c, result)) break;
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->minUnsafeCP = c;
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP;
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->minContrEndCP = 0;
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (c=0; c<0x300; c++) {  // Find the Contraction-ending char.
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ucol_contractionEndCP(c, result)) break;
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->minContrEndCP = c;
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* max expansion tables */
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->endExpansionCE = (uint32_t*)((uint8_t*)result->image +
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         result->image->endExpansionCE);
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->lastEndExpansionCE = result->endExpansionCE +
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 result->image->endExpansionCECount - 1;
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->expansionCESize = (uint8_t*)result->image +
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               result->image->expansionCESize;
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //result->errorCode = *status;
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->latinOneCEs = NULL;
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->latinOneRegenTable = FALSE;
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->latinOneFailed = FALSE;
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->UCA = UCA;
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Normally these will be set correctly later. This is the default if you use UCA or the default. */
962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->ucaRules = NULL;
963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    result->actualLocale = NULL;
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->validLocale = NULL;
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->requestedLocale = NULL;
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->hasRealData = FALSE; // real data lives in .dat file...
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result->freeImageOnClose = FALSE;
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96927f654740f2a26ad62a5c155af9199af9e69b889claireho    /* set attributes */
97027f654740f2a26ad62a5c155af9199af9e69b889claireho    ucol_setOptionsFromHeader(
97127f654740f2a26ad62a5c155af9199af9e69b889claireho        result,
97227f654740f2a26ad62a5c155af9199af9e69b889claireho        (UColOptionSet*)((uint8_t*)result->image+result->image->options),
97327f654740f2a26ad62a5c155af9199af9e69b889claireho        status);
97427f654740f2a26ad62a5c155af9199af9e69b889claireho    result->freeOptionsOnClose = FALSE;
97527f654740f2a26ad62a5c155af9199af9e69b889claireho
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* new Mark's code */
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For generation of Implicit CEs
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @author Davis
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Cleaned up so that changes can be made more easily.
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Old values:
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First Implicit: E26A792D
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last Implicit: E3DC70C0
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First CJK: E0030300
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last CJK: E0A9DD00
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# First CJK_A: E0A9DF00
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Last CJK_A: E0DE3100
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following is a port of Mark's code for new treatment of implicits.
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is positioned here, since ucol_initUCA need to initialize the
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * variables below according to the data in the fractional UCA.
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Function used to:
1001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
1002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * b) bump any non-CJK characters by 10FFFF.
1003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The relevant blocks are:
1004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * A:    4E00..9FFF; CJK Unified Ideographs
1005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *       F900..FAFF; CJK Compatibility Ideographs
1006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * B:    3400..4DBF; CJK Unified Ideographs Extension A
1007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *       20000..XX;  CJK Unified Ideographs Extension B (and others later on)
1008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * As long as
1009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *   no new B characters are allocated between 4E00 and FAFF, and
1010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *   no new A characters are outside of this range,
1011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (very high probability) this simple code will work.
1012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The reordered blocks are:
1013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block1 is CJK
1014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block2 is CJK_COMPAT_USED
1015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Block3 is CJK_A
1016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (all contiguous)
1017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Any other CJK gets its normal code point
1018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Any non-CJK gets +10FFFF
1019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * When we reorder Block1, we make sure that it is at the very start,
1020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * so that it will use a 3-byte form.
1021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Warning: the we only pick up the compatibility characters that are
1022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * NOT decomposed, so that block is smaller!
1023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// CONSTANTS
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    NON_CJK_OFFSET = 0x110000,
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1031b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Precomputed by initImplicitConstants()
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final3Multiplier = 0,
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final4Multiplier = 0,
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final3Count = 0,
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final4Count = 0,
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    medialCount = 0,
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min3Primary = 0,
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min4Primary = 0,
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max4Primary = 0,
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    minTrail = 0,
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    maxTrail = 0,
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max3Trail = 0,
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max4Trail = 0,
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min4Boundary = 0;
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32
104927f654740f2a26ad62a5c155af9199af9e69b889claireho    // 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
105027f654740f2a26ad62a5c155af9199af9e69b889claireho    // 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_BASE = 0x4E00,
105227f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_LIMIT = 0x9FCB+1,
105327f654740f2a26ad62a5c155af9199af9e69b889claireho    // Unified CJK ideographs in the compatibility ideographs block.
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_COMPAT_USED_BASE = 0xFA0E,
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
105627f654740f2a26ad62a5c155af9199af9e69b889claireho    // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
105727f654740f2a26ad62a5c155af9199af9e69b889claireho    // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_A_BASE = 0x3400,
105927f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_A_LIMIT = 0x4DB5+1,
106027f654740f2a26ad62a5c155af9199af9e69b889claireho    // 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
106127f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CJK_B_BASE = 0x20000,
106327f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_B_LIMIT = 0x2A6D6+1,
106427f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;
106527f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;
106627f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_C_BASE = 0x2A700,
106727f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_C_LIMIT = 0x2B734+1,
106827f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;
106927f654740f2a26ad62a5c155af9199af9e69b889claireho    // 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;
107027f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_D_BASE = 0x2B740,
107127f654740f2a26ad62a5c155af9199af9e69b889claireho    CJK_D_LIMIT = 0x2B81D+1;
107227f654740f2a26ad62a5c155af9199af9e69b889claireho    // when adding to this list, look for all occurrences (in project)
107327f654740f2a26ad62a5c155af9199af9e69b889claireho    // of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing!!!!
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 swapCJK(UChar32 i) {
107627f654740f2a26ad62a5c155af9199af9e69b889claireho    if (i < CJK_A_BASE) {
107727f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
107827f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_A_LIMIT) {
107927f654740f2a26ad62a5c155af9199af9e69b889claireho        // Extension A has lower code points than the original Unihan+compat
108027f654740f2a26ad62a5c155af9199af9e69b889claireho        // but sorts higher.
108127f654740f2a26ad62a5c155af9199af9e69b889claireho        return i - CJK_A_BASE
108227f654740f2a26ad62a5c155af9199af9e69b889claireho                + (CJK_LIMIT - CJK_BASE)
108327f654740f2a26ad62a5c155af9199af9e69b889claireho                + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
108427f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_BASE) {
108527f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
108627f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_LIMIT) {
108727f654740f2a26ad62a5c155af9199af9e69b889claireho        return i - CJK_BASE;
108827f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_COMPAT_USED_BASE) {
108927f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
109027f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_COMPAT_USED_LIMIT) {
109127f654740f2a26ad62a5c155af9199af9e69b889claireho        return i - CJK_COMPAT_USED_BASE
109227f654740f2a26ad62a5c155af9199af9e69b889claireho                + (CJK_LIMIT - CJK_BASE);
109327f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_B_BASE) {
109427f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
109527f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_B_LIMIT) {
109627f654740f2a26ad62a5c155af9199af9e69b889claireho        return i; // non-BMP-CJK
109727f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_C_BASE) {
109827f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
109927f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_C_LIMIT) {
110027f654740f2a26ad62a5c155af9199af9e69b889claireho        return i; // non-BMP-CJK
110127f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_D_BASE) {
110227f654740f2a26ad62a5c155af9199af9e69b889claireho        // non-CJK
110327f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (i < CJK_D_LIMIT) {
110427f654740f2a26ad62a5c155af9199af9e69b889claireho        return i; // non-BMP-CJK
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return i + NON_CJK_OFFSET; // non-CJK
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getRawFromCodePoint(UChar32 i) {
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return swapCJK(i)+1;
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getCodePointFromRaw(UChar32 i) {
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i--;
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 result = 0;
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(i >= NON_CJK_OFFSET) {
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = i - NON_CJK_OFFSET;
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(i >= CJK_B_BASE) {
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = i;
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(i < CJK_LIMIT - CJK_BASE) {
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = i + CJK_BASE;
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = -1;
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// GET IMPLICIT PRIMARY WEIGHTS
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Return value is left justified primary key
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getImplicitFromRaw(UChar32 cp) {
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0 || cp > UCOL_MAX_INPUT) {
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t last0 = cp - min4Boundary;
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (last0 < 0) {
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last1 = cp / final3Count;
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last0 = cp % final3Count;
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last2 = last1 / medialCount;
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last1 %= medialCount;
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last1 = minTrail + last1; // offset
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last2 = min3Primary + last2; // offset
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (last2 >= min4Primary) {
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2));
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (last2 << 24) + (last1 << 16) + (last0 << 8);
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last1 = last0 / final4Count;
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last0 %= final4Count;
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last2 = last1 / medialCount;
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last1 %= medialCount;
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t last3 = last2 / medialCount;
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last2 %= medialCount;
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last1 = minTrail + last1; // offset
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last2 = minTrail + last2; // offset
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last3 = min4Primary + last3; // offset
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (last3 > max4Primary) {
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3));
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t U_EXPORT2
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getImplicitPrimary(UChar32 cp) {
118727f654740f2a26ad62a5c155af9199af9e69b889claireho   //fprintf(stdout, "Incoming: %04x\n", cp);
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    cp = swapCJK(cp);
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    cp++;
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // we now have a range of numbers from 0 to 21FFFF.
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
119527f654740f2a26ad62a5c155af9199af9e69b889claireho    //fprintf(stdout, "CJK swapped: %04x\n", cp);
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return uprv_uca_getImplicitFromRaw(cp);
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Converts implicit CE into raw integer ("code point")
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param implicit
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return -1 if illegal format
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_getRawFromImplicit(uint32_t implicit) {
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 result;
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 b3 = implicit & 0xFF;
1209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 b2 = (implicit >> 8) & 0xFF;
1210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 b1 = (implicit >> 16) & 0xFF;
1211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 b0 = (implicit >> 24) & 0xFF;
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // simple parameter checks
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (b0 < min3Primary || b0 > max4Primary
1215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || b1 < minTrail || b1 > maxTrail)
1216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return -1;
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // normal offsets
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    b1 -= minTrail;
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // take care of the final values, and compose
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (b0 < min4Primary) {
1222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (b2 < minTrail || b2 > max3Trail || b3 != 0)
1223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return -1;
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b2 -= minTrail;
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 remainder = b2 % final3Multiplier;
1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (remainder != 0)
1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return -1;
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b0 -= min3Primary;
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b2 /= final3Multiplier;
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = ((b0 * medialCount) + b1) * final3Count + b2;
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (b2 < minTrail || b2 > maxTrail
1233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            || b3 < minTrail || b3 > max4Trail)
1234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return -1;
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b2 -= minTrail;
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b3 -= minTrail;
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 remainder = b3 % final4Multiplier;
1238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (remainder != 0)
1239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return -1;
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b3 /= final4Multiplier;
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b0 -= min4Primary;
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // final check
1245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (result < 0 || result > UCOL_MAX_INPUT)
1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return -1;
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t divideAndRoundUp(int a, int b) {
1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 1 + (a-1)/b;
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this function is either called from initUCA or from genUCA before
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * doing canonical closure for the UCA.
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set up to generate implicits.
1261b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Maintenance Note:  this function may end up being called more than once, due
1262b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                    to threading races during initialization.  Make sure that
1263b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                    none of the Constants is ever transiently assigned an
1264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                    incorrect value.
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param minPrimary
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param maxPrimary
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param minTrail final byte
1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param maxTrail final byte
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param gap3 the gap we leave for tailoring for 3-byte forms
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param gap4 the gap we leave for tailoring for 4-byte forms
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void initImplicitConstants(int minPrimary, int maxPrimary,
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    int minTrailIn, int maxTrailIn,
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    int gap3, int primaries3count,
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    UErrorCode *status) {
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // some simple parameter checks
1277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ((minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF)
1278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF)
1279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || (primaries3count < 1))
1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    minTrail = minTrailIn;
1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    maxTrail = maxTrailIn;
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min3Primary = minPrimary;
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max4Primary = maxPrimary;
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // compute constants for use later.
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // number of values we can use in trailing bytes
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // leave room for empty values between AND above, e.g. if gap = 2
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // range 3..7 => +3 -4 -5 -6 -7: so 1 value
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final3Multiplier = gap3 + 1;
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // medials can use full range
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    medialCount = (maxTrail - minTrail + 1);
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // find out how many values fit in each form
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t threeByteCount = medialCount * final3Count;
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // now determine where the 3/4 boundary is.
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // we use 3 bytes below the boundary, and 4 above
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t primariesAvailable = maxPrimary - minPrimary + 1;
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t primaries4count = primariesAvailable - primaries3count;
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t min3ByteCoverage = primaries3count * threeByteCount;
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min4Primary = minPrimary + primaries3count;
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    min4Boundary = min3ByteCoverage;
1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Now expand out the multiplier for the 4 bytes, and redo.
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary;
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (gap4 < 1) {
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final4Multiplier = gap4 + 1;
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    final4Count = neededPerFinalByte;
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Supply parameters for generating implicit CEs
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
1332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruuprv_uca_initImplicitConstants(UErrorCode *status) {
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
1335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*    collIterNormalize     Incremental Normalization happens here.                       */
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                          pick up the range of chars identifed by FCD,                  */
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                          normalize it into the collIterate's writable buffer,          */
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                          switch the collIterate's state to use the writable buffer.    */
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                                        */
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid collIterNormalize(collIterate *collationSource)
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status = U_ZERO_ERROR;
134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *srcP = collationSource->pos - 1;      /*  Start of chars to normalize    */
134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *endP = collationSource->fcdPosition;  /* End of region to normalize+1    */
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - srcP)),
135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    collationSource->writableBuffer,
135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    status);
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG
135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_errorName(status));
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    collationSource->pos        = collationSource->writableBuffer.getTerminatedBuffer();
1362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->origFlags  = collationSource->flags;
1363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->flags     |= UCOL_ITER_INNORMBUF;
1364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This function takes the iterator and extracts normalized stuff up to the next boundary
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is similar in the end results to the collIterNormalize, but for the cases when we
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// use an iterator
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizeIterator(collIterate *collationSource) {
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool wasNormalized = FALSE;
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->iterator, UITER_CURRENT);
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t iterIndex = collationSource->iterator->getState(collationSource->iterator);
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(status == U_BUFFER_OVERFLOW_ERROR || normLen == (int32_t)collationSource->writableBufSize) {
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // reallocate and terminate
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!u_growBufferFromStatic(collationSource->stackWritableBuffer,
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               &collationSource->writableBuffer,
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               (int32_t *)&collationSource->writableBufSize, normLen + 1,
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               0)
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #ifdef UCOL_DEBUG
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "normalizeIterator(), out of memory\n");
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #endif
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //collationSource->iterator->move(collationSource->iterator, iterIndex, UITER_ZERO);
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collationSource->iterator->setState(collationSource->iterator, iterIndex, &status);
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Terminate the buffer - we already checked that it is big enough
1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->writableBuffer[normLen] = 0;
1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      collationSource->flags |= UCOL_ITER_ALLOCATED;
1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->pos        = collationSource->writableBuffer;
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->origFlags  = collationSource->flags;
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->flags     |= UCOL_ITER_INNORMBUF;
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  collationSource->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Incremental FCD check and normalize                                                    */
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*   Called from getNextCE when normalization state is suspect.                           */
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*   When entering, the state is known to be this:                                        */
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*      o   We are working in the main buffer of the collIterate, not the side            */
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          writable buffer.  When in the side buffer, normalization mode is always off,  */
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          so we won't get here.                                                         */
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*      o   The leading combining class from the current character is 0 or                */
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          the trailing combining class of the previous char was zero.                   */
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          True because the previous call to this function will have always exited       */
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          that way, and we get called for every char where cc might be non-zero.        */
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collIterFCD(collIterate *collationSource) {
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *srcP, *endP;
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t     leadingCC;
1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t     prevTrailingCC = 0;
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t    fcd;
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool       needNormalize = FALSE;
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    srcP = collationSource->pos-1;
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (collationSource->flags & UCOL_ITER_HASLEN) {
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        endP = collationSource->endp;
1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        endP = NULL;
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Get the trailing combining class of the current character.  If it's zero,
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   we are OK.
1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* trie access */
1438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);
1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fcd != 0) {
1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (prevTrailingCC != 0) {
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // The current char has a non-zero trailing CC.  Scan forward until we find
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   a char with a leading cc of zero.
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while (endP == NULL || srcP != endP)
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UChar *savedSrcP = srcP;
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* trie access */
1450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (leadingCC == 0) {
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    srcP = savedSrcP;      // Hit char that is not part of combining sequence.
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           //   back up over it.  (Could be surrogate pair!)
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (leadingCC < prevTrailingCC) {
1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    needNormalize = TRUE;
1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collationSource->fcdPosition = (UChar *)srcP;
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return needNormalize;
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the CE retrieval functions                                 */
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                          */
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t getImplicit(UChar32 cp, collIterate *collationSource);
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource);
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there should be a macro version of this function in the header file */
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the first function that tries to fetch a collation element  */
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* If it's not succesfull or it encounters a more difficult situation  */
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* some more sofisticated and slower functions are invoked             */
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t order = 0;
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (collationSource->CEpos > collationSource->toReturn) {       /* Are there any CEs from previous expansions? */
1488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        order = *(collationSource->toReturn++);                         /* if so, return them */
1489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(collationSource->CEpos == collationSource->toReturn) {
1490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            collationSource->CEpos = collationSource->toReturn = collationSource->extendCEs ? collationSource->extendCEs : collationSource->CEs;
1491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return order;
1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar ch = 0;
1496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->offsetReturn = NULL;
1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
1499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        for (;;)                           /* Loop handles case when incremental normalize switches   */
1500b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        {                                  /*   to or from the side buffer / original string, and we  */
1501b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /*   need to start again to get the next character.        */
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1503b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
1504b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            {
1505b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // The source string is null terminated and we're not working from the side buffer,
1506b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                //   and we're not normalizing.  This is the fast path.
1507b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                //   (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)
1508b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ch = *collationSource->pos++;
1509b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (ch != 0) {
1510b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    break;
1511b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
1512b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                else {
1513b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return UCOL_NO_MORE_CES;
1514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1517b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (collationSource->flags & UCOL_ITER_HASLEN) {
1518b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Normal path for strings when length is specified.
1519b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                //   (We can't be in side buffer because it is always null terminated.)
1520b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (collationSource->pos >= collationSource->endp) {
1521b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // Ran off of the end of the main source string.  We're done.
1522b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return UCOL_NO_MORE_CES;
1523b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
1524b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ch = *collationSource->pos++;
1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1526b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            else if(collationSource->flags & UCOL_USE_ITERATOR) {
1527b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
1528b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(iterCh == U_SENTINEL) {
1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return UCOL_NO_MORE_CES;
1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1531b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ch = (UChar)iterCh;
1532b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
1533b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            else
1534b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            {
1535b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Null terminated string.
1536b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ch = *collationSource->pos++;
1537b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (ch == 0) {
1538b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // Ran off end of buffer.
1539b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
1540b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        // Ran off end of main string. backing up one character.
1541b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        collationSource->pos--;
1542b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        return UCOL_NO_MORE_CES;
1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1544b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    else
1545b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    {
1546b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        // Hit null in the normalize side buffer.
1547b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        // Usually this means the end of the normalized data,
1548b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        // except for one odd case: a null followed by combining chars,
1549b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        //   which is the case if we are at the start of the buffer.
1550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {
1551b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            break;
1552b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1554b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        //  Null marked end of side buffer.
1555b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        //   Revert to the main string and
1556b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        //   loop back to top to try again to get a character.
1557b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        collationSource->pos   = collationSource->fcdPosition;
1558b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        collationSource->flags = collationSource->origFlags;
1559b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        continue;
1560b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1564b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(collationSource->flags&UCOL_HIRAGANA_Q) {
1565b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
1566b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                 * based on whether the previous codepoint was Hiragana or Katakana.
1567b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                 */
1568b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||
1569b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
1570b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    collationSource->flags |= UCOL_WAS_HIRAGANA;
1571b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
1572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    collationSource->flags &= ~UCOL_WAS_HIRAGANA;
1573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
1574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // We've got a character.  See if there's any fcd and/or normalization stuff to do.
1577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            //    Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.
1578b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
1579b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
1580b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1582b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (collationSource->fcdPosition >= collationSource->pos) {
1583b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // An earlier FCD check has already covered the current character.
1584b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // We can go ahead and process this char.
1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1588b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (ch < ZERO_CC_LIMIT_ ) {
1589b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Fast fcd safe path.  Trailing combining class == 0.  This char is OK.
1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1593b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
1594b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // We need to peek at the next character in order to tell if we are FCD
1595b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
1596b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // We are at the last char of source string.
1597b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    //  It is always OK for FCD check.
1598b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    break;
1599b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
1600b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1601b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Not at last char of source string (or we'll check against terminating null).  Do the FCD fast test
1602b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
1603b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    break;
1604b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
1605b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1608b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Need a more complete FCD check and possible normalization.
1609b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (collIterFCD(collationSource)) {
1610b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                collIterNormalize(collationSource);
1611b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
1612b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
1613b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                //  No normalization was needed.  Go ahead and process the char we already had.
1614b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
1615b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
1616b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1617b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Some normalization happened.  Next loop iteration will pick up a char
1618b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            //   from the normalization buffer.
1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1620b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }   // end for (;;)
1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1623b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (ch <= 0xFF) {
1624b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /*  For latin-1 characters we never need to fall back to the UCA table        */
1625b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /*    because all of the UCA data is replicated in the latinOneMapping array  */
1626b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            order = coll->latinOneMapping[ch];
1627b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (order > UCOL_NOT_FOUND) {
1628b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
1629b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
1630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1631b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        else
1632b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        {
1633b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Always use UCA for Han, Hangul
1634b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // (Han extension A is before main Han block)
1635b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // **** Han compatibility chars ?? ****
1636b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
1637b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
1638b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
1639b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // between the two target ranges; do normal lookup
1640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // **** this range is YI, Modifier tone letters, ****
1641b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
1642b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // **** Latin-D might be tailored, so we need to ****
1643b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // **** do the normal lookup for these guys.     ****
1644b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
1645b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
1646b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // in one of the target ranges; use UCA
1647b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    order = UCOL_NOT_FOUND;
1648b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
1649b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else {
1650b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
1651b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
1652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1653b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
1654b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);    /* and try to get the special CE     */
1655b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
1656b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1657b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a good CE in the tailoring */
1658b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
1659b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1661b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
1662b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
1663b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1666b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
1667b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(order == UCOL_NOT_FOUND) {
1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        order = getImplicit(ch, collationSource);
1670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return order; /* return the CE */
1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_getNextCE, out-of-line version for use from other files.   */
1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t  U_EXPORT2
1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ucol_IGetNextCE(coll, collationSource, status);
1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Incremental previous normalization happens here. Pick up the range of chars
1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* identifed by FCD, normalize it into the collIterate's writable buffer,
1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* switch the collIterate's state to use the writable buffer.
1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data
1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid collPrevIterNormalize(collIterate *data)
1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status  = U_ZERO_ERROR;
169150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pEnd   = data->pos;  /* End normalize + 1 */
169250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pStart;
1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Start normalize */
1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->fcdPosition == NULL) {
1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pStart = data->string;
1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pStart = data->fcdPosition + 1;
1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t normLen =
170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pStart) + 1)),
170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             data->writableBuffer,
170550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             status).
170650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length();
170750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(status)) {
170850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    this puts the null termination infront of the normalized string instead
1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    of the end
1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
171450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->writableBuffer.insert(0, (UChar)0);
1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /*
1717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * The usual case at this point is that we've got a base
1718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * character followed by marks that were normalized. If
1719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * fcdPosition is NULL, that means that we backed up to
1720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * the beginning of the string and there's no base character.
1721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *
1722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * Forward processing will usually normalize when it sees
1723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * the first mark, so that mark will get it's natural offset
1724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * and the rest will get the offset of the character following
1725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * the marks. The base character will also get its natural offset.
1726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *
1727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * We write the offset of the base character, if there is one,
1728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * followed by the offset of the first mark and then the offsets
1729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * of the rest of the marks.
1730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     */
1731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t firstMarkOffset = 0;
173250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t trailOffset     = (int32_t)(data->pos - data->string + 1);
1733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t trailCount      = normLen - 1;
1734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (data->fcdPosition != NULL) {
173650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t baseOffset = (int32_t)(data->fcdPosition - data->string);
1737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UChar   baseChar   = *data->fcdPosition;
1738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        firstMarkOffset = baseOffset + 1;
1740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*
174250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * If the base character is the start of a contraction, forward processing
174350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * will normalize the marks while checking for the contraction, which means
174450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * that the offset of the first mark will the same as the other marks.
174550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
174650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * **** THIS IS PROBABLY NOT A COMPLETE TEST ****
174750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
174850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (baseChar >= 0x100) {
174950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, baseChar);
175050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
175150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) {
175250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, baseChar);
175350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
175450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
175550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION_TAG) {
175650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                firstMarkOffset = trailOffset;
175750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
175850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
176027f654740f2a26ad62a5c155af9199af9e69b889claireho        data->appendOffset(baseOffset, status);
1761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
176327f654740f2a26ad62a5c155af9199af9e69b889claireho    data->appendOffset(firstMarkOffset, status);
1764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for (int32_t i = 0; i < trailCount; i += 1) {
176627f654740f2a26ad62a5c155af9199af9e69b889claireho        data->appendOffset(trailOffset, status);
1767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    data->offsetRepeatValue = trailOffset;
1770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    data->offsetReturn = data->offsetStore - 1;
1772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (data->offsetReturn == data->offsetBuffer) {
1773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->offsetStore = data->offsetBuffer;
1774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
177650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->pos        = data->writableBuffer.getTerminatedBuffer() + 1 + normLen;
1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->origFlags  = data->flags;
1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     |= UCOL_ITER_INNORMBUF;
1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Incremental FCD check for previous iteration and normalize. Called from
1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* getPrevCE when normalization state is suspect.
1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* When entering, the state is known to be this:
1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* o  We are working in the main buffer of the collIterate, not the side
1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    writable buffer. When in the side buffer, normalization mode is always
1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    off, so we won't get here.
1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* o  The leading combining class from the current character is 0 or the
1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    trailing combining class of the previous char was zero.
1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    True because the previous call to this function will have always exited
1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*    that way, and we get called for every char where cc might be non-zero.
1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterate struct
1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return normalization status, TRUE for normalization to be done, FALSE
1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*         otherwise
1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool collPrevIterFCD(collIterate *data)
1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *src, *start;
1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t     leadingCC;
1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t     trailingCC = 0;
1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t    fcd;
1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool       result = FALSE;
1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    start = data->string;
1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    src = data->pos + 1;
1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Get the trailing combining class of the current character. */
1811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);
1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (leadingCC != 0) {
1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        The current char has a non-zero leading combining class.
1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        Scan backward until we find a char with a trailing cc of zero.
1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (;;)
1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (start == src) {
1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->fcdPosition = NULL;
1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return result;
1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1827b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);
1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (trailingCC == 0) {
1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (leadingCC < trailingCC) {
1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                result = TRUE;
1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->fcdPosition = (UChar *)src;
1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
184827f654740f2a26ad62a5c155af9199af9e69b889claireho/** gets a code unit from the string at a given offset
1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  Handles both normal and iterative cases.
1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  No error checking - caller beware!
1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
185227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline
185327f654740f2a26ad62a5c155af9199af9e69b889clairehoUChar peekCodeUnit(collIterate *source, int32_t offset) {
1854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(source->pos != NULL) {
1855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return *(source->pos + offset);
1856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else if(source->iterator != NULL) {
185727f654740f2a26ad62a5c155af9199af9e69b889claireho        UChar32 c;
1858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(offset != 0) {
1859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            source->iterator->move(source->iterator, offset, UITER_CURRENT);
186027f654740f2a26ad62a5c155af9199af9e69b889claireho            c = source->iterator->next(source->iterator);
1861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            source->iterator->move(source->iterator, -offset-1, UITER_CURRENT);
1862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
186327f654740f2a26ad62a5c155af9199af9e69b889claireho            c = source->iterator->current(source->iterator);
1864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
186527f654740f2a26ad62a5c155af9199af9e69b889claireho        return c >= 0 ? (UChar)c : 0xfffd;  // If the caller works properly, we should never see c<0.
1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
186727f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0xfffd;
1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
187127f654740f2a26ad62a5c155af9199af9e69b889claireho// Code point version. Treats the offset as a _code point_ delta.
187227f654740f2a26ad62a5c155af9199af9e69b889claireho// We cannot use U16_FWD_1_UNSAFE and similar because we might not have well-formed UTF-16.
187327f654740f2a26ad62a5c155af9199af9e69b889claireho// We cannot use U16_FWD_1 and similar because we do not know the start and limit of the buffer.
187427f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline
187527f654740f2a26ad62a5c155af9199af9e69b889clairehoUChar32 peekCodePoint(collIterate *source, int32_t offset) {
187627f654740f2a26ad62a5c155af9199af9e69b889claireho    UChar32 c;
187727f654740f2a26ad62a5c155af9199af9e69b889claireho    if(source->pos != NULL) {
187827f654740f2a26ad62a5c155af9199af9e69b889claireho        const UChar *p = source->pos;
187927f654740f2a26ad62a5c155af9199af9e69b889claireho        if(offset >= 0) {
188027f654740f2a26ad62a5c155af9199af9e69b889claireho            // Skip forward over (offset-1) code points.
188127f654740f2a26ad62a5c155af9199af9e69b889claireho            while(--offset >= 0) {
188227f654740f2a26ad62a5c155af9199af9e69b889claireho                if(U16_IS_LEAD(*p++) && U16_IS_TRAIL(*p)) {
188327f654740f2a26ad62a5c155af9199af9e69b889claireho                    ++p;
188427f654740f2a26ad62a5c155af9199af9e69b889claireho                }
188527f654740f2a26ad62a5c155af9199af9e69b889claireho            }
188627f654740f2a26ad62a5c155af9199af9e69b889claireho            // Read the code point there.
188727f654740f2a26ad62a5c155af9199af9e69b889claireho            c = *p++;
188827f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar trail;
188927f654740f2a26ad62a5c155af9199af9e69b889claireho            if(U16_IS_LEAD(c) && U16_IS_TRAIL(trail = *p)) {
189027f654740f2a26ad62a5c155af9199af9e69b889claireho                c = U16_GET_SUPPLEMENTARY(c, trail);
189127f654740f2a26ad62a5c155af9199af9e69b889claireho            }
189227f654740f2a26ad62a5c155af9199af9e69b889claireho        } else /* offset<0 */ {
189327f654740f2a26ad62a5c155af9199af9e69b889claireho            // Skip backward over (offset-1) code points.
189427f654740f2a26ad62a5c155af9199af9e69b889claireho            while(++offset < 0) {
189527f654740f2a26ad62a5c155af9199af9e69b889claireho                if(U16_IS_TRAIL(*--p) && U16_IS_LEAD(*(p - 1))) {
189627f654740f2a26ad62a5c155af9199af9e69b889claireho                    --p;
189727f654740f2a26ad62a5c155af9199af9e69b889claireho                }
189827f654740f2a26ad62a5c155af9199af9e69b889claireho            }
189927f654740f2a26ad62a5c155af9199af9e69b889claireho            // Read the code point before that.
190027f654740f2a26ad62a5c155af9199af9e69b889claireho            c = *--p;
190127f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar lead;
190227f654740f2a26ad62a5c155af9199af9e69b889claireho            if(U16_IS_TRAIL(c) && U16_IS_LEAD(lead = *(p - 1))) {
190327f654740f2a26ad62a5c155af9199af9e69b889claireho                c = U16_GET_SUPPLEMENTARY(lead, c);
190427f654740f2a26ad62a5c155af9199af9e69b889claireho            }
190527f654740f2a26ad62a5c155af9199af9e69b889claireho        }
190627f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if(source->iterator != NULL) {
190727f654740f2a26ad62a5c155af9199af9e69b889claireho        if(offset >= 0) {
190827f654740f2a26ad62a5c155af9199af9e69b889claireho            // Skip forward over (offset-1) code points.
190927f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t fwd = offset;
191027f654740f2a26ad62a5c155af9199af9e69b889claireho            while(fwd-- > 0) {
191127f654740f2a26ad62a5c155af9199af9e69b889claireho                uiter_next32(source->iterator);
191227f654740f2a26ad62a5c155af9199af9e69b889claireho            }
191327f654740f2a26ad62a5c155af9199af9e69b889claireho            // Read the code point there.
191427f654740f2a26ad62a5c155af9199af9e69b889claireho            c = uiter_current32(source->iterator);
191527f654740f2a26ad62a5c155af9199af9e69b889claireho            // Return to the starting point, skipping backward over (offset-1) code points.
191627f654740f2a26ad62a5c155af9199af9e69b889claireho            while(offset-- > 0) {
191727f654740f2a26ad62a5c155af9199af9e69b889claireho                uiter_previous32(source->iterator);
191827f654740f2a26ad62a5c155af9199af9e69b889claireho            }
191927f654740f2a26ad62a5c155af9199af9e69b889claireho        } else /* offset<0 */ {
192027f654740f2a26ad62a5c155af9199af9e69b889claireho            // Read backward, reading offset code points, remember only the last-read one.
192127f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t back = offset;
192227f654740f2a26ad62a5c155af9199af9e69b889claireho            do {
192327f654740f2a26ad62a5c155af9199af9e69b889claireho                c = uiter_previous32(source->iterator);
192427f654740f2a26ad62a5c155af9199af9e69b889claireho            } while(++back < 0);
192527f654740f2a26ad62a5c155af9199af9e69b889claireho            // Return to the starting position, skipping forward over offset code points.
192627f654740f2a26ad62a5c155af9199af9e69b889claireho            do {
192727f654740f2a26ad62a5c155af9199af9e69b889claireho                uiter_next32(source->iterator);
192827f654740f2a26ad62a5c155af9199af9e69b889claireho            } while(++offset < 0);
192927f654740f2a26ad62a5c155af9199af9e69b889claireho        }
193027f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
193127f654740f2a26ad62a5c155af9199af9e69b889claireho        c = U_SENTINEL;
193227f654740f2a26ad62a5c155af9199af9e69b889claireho    }
193327f654740f2a26ad62a5c155af9199af9e69b889claireho    return c;
193427f654740f2a26ad62a5c155af9199af9e69b889claireho}
193527f654740f2a26ad62a5c155af9199af9e69b889claireho
1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Determines if we are at the start of the data string in the backwards
1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collation iterator
1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator
1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return TRUE if we are at the start
1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool isAtStartPrevIterate(collIterate *data) {
1944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->pos == NULL && data->iterator != NULL) {
1945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return !data->iterator->hasPrevious(data->iterator);
1946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //return (collIter_bos(data)) ||
1948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (data->pos == data->string) ||
1949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru              ((data->flags & UCOL_ITER_INNORMBUF) &&
1950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru              *(data->pos - 1) == 0 && data->fcdPosition == NULL);
1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void goBackOne(collIterate *data) {
1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# if 0
1956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // somehow, it looks like we need to keep iterator synced up
1957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // at all times, as above.
1958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->pos) {
1959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->pos--;
1960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->iterator) {
1962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->iterator->previous(data->iterator);
1963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
1966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->iterator->previous(data->iterator);
1967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(data->pos) {
1969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->pos --;
1970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inline function that gets a simple CE.
1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* So what it does is that it will first check the expansion buffer. If the
1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* expansion buffer is not empty, ie the end pointer to the expansion buffer
1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* is different from the string pointer, we return the collation element at the
1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* return pointer and decrement it.
1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* For more complicated CEs it resorts to getComplicatedCE.
1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll collator data
1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator struct
1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param status error status
1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               UErrorCode *status)
1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t result = (uint32_t)UCOL_NULLORDER;
1989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (data->offsetReturn != NULL) {
1991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (data->offsetRepeatCount > 0) {
1992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                data->offsetRepeatCount -= 1;
1993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
1994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (data->offsetReturn == data->offsetBuffer) {
1995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                data->offsetReturn = NULL;
199627f654740f2a26ad62a5c155af9199af9e69b889claireho                data->offsetStore  = data->offsetBuffer;
1997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
1998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                data->offsetReturn -= 1;
1999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
2001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
2002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ((data->extendCEs && data->toReturn > data->extendCEs) ||
2004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            (!data->extendCEs && data->toReturn > data->CEs))
2005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
2006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        data->toReturn -= 1;
2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = *(data->toReturn);
2008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (data->CEs == data->toReturn || data->extendCEs == data->toReturn) {
2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->CEpos = data->toReturn;
2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar ch = 0;
2014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2015b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        do {
2016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /*
2017b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Loop handles case when incremental normalize switches to or from the
2018b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            side buffer / original string, and we need to start again to get the
2019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            next character.
2020b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            */
2021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            for (;;) {
2022b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (data->flags & UCOL_ITER_HASLEN) {
2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /*
2024b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    Normal path for strings when length is specified.
2025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    Not in side buffer because it is always null terminated.
2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    */
2027b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (data->pos <= data->string) {
2028b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        /* End of the main source string */
2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return UCOL_NO_MORE_CES;
2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    data->pos --;
2032b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    ch = *data->pos;
2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2034b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // we are using an iterator to go back. Pray for us!
2035b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                else if (data->flags & UCOL_USE_ITERATOR) {
2036b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  UChar32 iterCh = data->iterator->previous(data->iterator);
2037b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  if(iterCh == U_SENTINEL) {
2038b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return UCOL_NO_MORE_CES;
2039b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  } else {
2040b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    ch = (UChar)iterCh;
2041b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  }
2042b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
2043b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                else {
2044b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    data->pos --;
2045b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    ch = *data->pos;
2046b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    /* we are in the side buffer. */
2047b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (ch == 0) {
2048b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        /*
2049b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        At the start of the normalize side buffer.
2050b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        Go back to string.
2051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        Because pointer points to the last accessed character,
2052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        hence we have to increment it by one here.
2053b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        */
2054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        data->flags = data->origFlags;
2055b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        data->offsetRepeatValue = 0;
2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                         if (data->fcdPosition == NULL) {
2058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            data->pos = data->string;
2059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            return UCOL_NO_MORE_CES;
2060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
2061b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        else {
2062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            data->pos   = data->fcdPosition + 1;
2063b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                       continue;
2066b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
2067b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2069b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(data->flags&UCOL_HIRAGANA_Q) {
2070b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  if(ch>=0x3040 && ch<=0x309f) {
2071b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    data->flags |= UCOL_WAS_HIRAGANA;
2072b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  } else {
2073b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    data->flags &= ~UCOL_WAS_HIRAGANA;
2074b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  }
2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2077b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /*
2078b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                * got a character to determine if there's fcd and/or normalization
2079b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                * stuff to do.
2080b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                * if the current character is not fcd.
2081b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                * if current character is at the start of the string
2082b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                * Trailing combining class == 0.
2083b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                * Note if pos is in the writablebuffer, norm is always 0
2084b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                */
2085b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (ch < ZERO_CC_LIMIT_ ||
2086b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  // this should propel us out of the loop in the iterator case
2087b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    (data->flags & UCOL_ITER_NORM) == 0 ||
2088b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
2089b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    || data->string == data->pos) {
2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2093b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
2094b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    /* if next character is FCD */
2095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (data->pos == data->string) {
2096b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        /* First char of string is always OK for FCD check */
2097b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        break;
2098b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    /* Not first char of string, do the FCD fast test */
2101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
2102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        break;
2103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
2104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
2105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /* Need a more complete FCD check and possible normalization. */
2107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (collPrevIterFCD(data)) {
2108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    collPrevIterNormalize(data);
2109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
2112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    /*  No normalization. Go ahead and process the char. */
2113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    break;
2114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
2115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
2116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /*
2117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                Some normalization happened.
2118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                Next loop picks up a char from the normalization buffer.
2119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                */
2120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
2122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /* attempt to handle contractions, after removal of the backwards
2123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            contraction
2124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            */
2125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
2126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
2127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
2128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (ch <= 0xFF) {
2129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    result = coll->latinOneMapping[ch];
2130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
2131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                else {
2132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // Always use UCA for [3400..9FFF], [AC00..D7AF]
2133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // **** [FA0E..FA2F] ?? ****
2134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
2135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (ch >= 0x3400 && ch <= 0xD7AF)) {
2136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if (ch > 0x9FFF && ch < 0xAC00) {
2137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            // between the two target ranges; do normal lookup
2138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            // **** this range is YI, Modifier tone letters, ****
2139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
2140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            // **** Latin-D might be tailored, so we need to ****
2141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            // **** do the normal lookup for these guys.     ****
2142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                             result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
2143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        } else {
2144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            result = UCOL_NOT_FOUND;
2145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
2146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    } else {
2147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
2148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    }
2149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                }
2150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (result > UCOL_NOT_FOUND) {
2151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
2152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
2153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (result == UCOL_NOT_FOUND) { // Not found in master list
2154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (!isAtStartPrevIterate(data) &&
2155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        ucol_contractionEndCP(ch, data->coll))
2156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    {
2157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        result = UCOL_CONTRACTION;
2158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
2159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if(coll->UCA) {
2160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
2161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
2162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
2163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (result > UCOL_NOT_FOUND) {
2165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if(coll->UCA) {
2166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
2167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
2168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
2172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(result == UCOL_NOT_FOUND) {
2174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result = getPrevImplicit(ch, data);
2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*   ucol_getPrevCE, out-of-line version for use from other files.  */
2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint32_t  U_EXPORT2
2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getPrevCE(const UCollator *coll, collIterate *data,
2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UErrorCode *status) {
2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ucol_IGetPrevCE(coll, data, status);
2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this should be connected to special Jamo handling */
2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC uint32_t  U_EXPORT2
2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) {
2193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collIterate colIt;
219450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, &u, 1, &colIt, status);
219550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
219650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
219750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
219850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return ucol_IGetNextCE(coll, &colIt, status);
2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument character into the end of the buffer pushing back the
2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* null terminator.
2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate struct data
2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch character to be appended
2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the position of the new addition
2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
220950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline const UChar * insertBufferEnd(collIterate *data, UChar ch)
2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t oldLength = data->writableBuffer.length();
221250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength;
2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument string into the end of the buffer pushing back the
2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* null terminator.
2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collIterate struct data
2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param string to be appended
2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param length of the string to be appended
2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the position of the new addition
2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
222450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline const UChar * insertBufferEnd(collIterate *data, const UChar *str, int32_t length)
2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t oldLength = data->writableBuffer.length();
222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldLength;
2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Special normalization function for contraction in the forwards iterator.
2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This normalization sequence will place the current character at source->pos
2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and its following normalized sequence into the buffer.
2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The fcd position, pos will be changed.
2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* pos will now point to positions in the buffer.
2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Flags will be changed accordingly.
2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data
2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizeNextContraction(collIterate *data)
2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
224250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     strsize;
2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status     = U_ZERO_ERROR;
2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* because the pointer points to the next character */
224550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pStart    = data->pos - 1;
224650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pEnd;
2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
224950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->writableBuffer.setTo(*(pStart - 1));
2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        strsize               = 1;
2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
225350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        strsize = data->writableBuffer.length();
2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pEnd = data->fcdPosition;
2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
225850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->writableBuffer.append(
225950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), status));
226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(status)) {
226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
226450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->pos        = data->writableBuffer.getTerminatedBuffer() + strsize;
2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->origFlags  = data->flags;
2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     |= UCOL_ITER_INNORMBUF;
2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Contraction character management function that returns the next character
2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* for the forwards iterator.
2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Does nothing if the next character is in buffer and not the first character
2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* in it.
2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Else it checks next character in data string to see if it is normalizable.
2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* If it is not, the character is simply copied into the buffer, else
2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the whole normalized substring is copied into the buffer, including the
2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* current character.
2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data
2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return next character
2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UChar getNextNormalizedChar(collIterate *data)
2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar  nextch;
2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar  ch;
2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Here we need to add the iterator code. One problem is the way
2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // end of string is handled. If we just return next char, it could
2289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // be the sentinel. Most of the cases already check for this, but we
2290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // need to be sure.
2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ) {
2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         /* if no normalization and not in buffer. */
2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if(data->flags & UCOL_USE_ITERATOR) {
2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         return (UChar)data->iterator->next(data->iterator);
2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      } else {
2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         return *(data->pos ++);
2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      }
2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (data->flags & UCOL_ITER_NORM && data->flags & UCOL_USE_ITERATOR) {
2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //normalizeIterator(data);
2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((innormbuf && *data->pos != 0) ||
2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (data->fcdPosition != NULL && !innormbuf &&
2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->pos < data->fcdPosition)) {
2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if next character is in normalized buffer, no further normalization
2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        is required
2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *(data->pos ++);
2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->flags & UCOL_ITER_HASLEN) {
2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* in data string */
2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (data->pos + 1 == data->endp) {
2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos ++);
2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (innormbuf) {
2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // inside the normalization buffer, but at the end
2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // (since we encountered zero). This means, in the
2325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // case we're using char iterator, that we need to
2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // do another round of normalization.
2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //if(data->origFlags & UCOL_USE_ITERATOR) {
2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // we need to restore original flags,
2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // otherwise, we'll lose them
2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //data->flags = data->origFlags;
2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //normalizeIterator(data);
2332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //return *(data->pos++);
2333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //} else {
2334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
2335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            in writable buffer, at this point fcdPosition can not be
2336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pointing to the end of the data string. see contracting tag.
2337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            */
2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          if(data->fcdPosition) {
2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*(data->fcdPosition + 1) == 0 ||
2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                data->fcdPosition + 1 == data->endp) {
2341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* at the end of the string, dump it into the normalizer */
234250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1;
2343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Check if data->pos received a null pointer
2344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (data->pos == NULL) {
2345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return (UChar)-1; // Return to indicate error.
2346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
2347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return *(data->fcdPosition ++);
2348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->pos = data->fcdPosition;
2350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          } else if(data->origFlags & UCOL_USE_ITERATOR) {
2351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // if we are here, we're using a normalizing iterator.
2352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // we should just continue further.
2353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->flags = data->origFlags;
2354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->pos = NULL;
2355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return (UChar)data->iterator->next(data->iterator);
2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          }
2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //}
2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*(data->pos + 1) == 0) {
2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return *(data->pos ++);
2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ch = *data->pos ++;
2367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    nextch = *data->pos;
2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * if the current character is not fcd.
2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Trailing combining class == 0.
2372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->fcdPosition == NULL || data->fcdPosition < data->pos) &&
2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ ||
2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) {
2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
2377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            Need a more complete FCD check and possible normalization.
2378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            normalize substring will be appended to buffer
2379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            */
2380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (collIterFCD(data)) {
2381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            normalizeNextContraction(data);
2382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos ++);
2383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else if (innormbuf) {
2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* fcdposition shifted even when there's no normalization, if we
2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            don't input the rest into this, we'll get the wrong position when
2387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            we reach the end of the writableBuffer */
238850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t length = (int32_t)(data->fcdPosition - data->pos + 1);
238950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            data->pos = insertBufferEnd(data, data->pos - 1, length);
2390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Check if data->pos received a null pointer
2391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (data->pos == NULL) {
2392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return (UChar)-1; // Return to indicate error.
2393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos ++);
2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (innormbuf) {
2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        no normalization is to be done hence only one character will be
2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        appended to the buffer.
2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->pos = insertBufferEnd(data, ch) + 1;
2404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Check if data->pos received a null pointer
2405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (data->pos == NULL) {
2406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return (UChar)-1; // Return to indicate error.
2407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* points back to the pos in string */
2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ch;
2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Function to copy the buffer into writableBuffer and sets the fcd position to
2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the correct position
2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param source data string source
2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param buffer character buffer
2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
242350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &buffer)
2424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* okay confusing part here. to ensure that the skipped characters are
2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    considered later, we need to place it in the appropriate position in the
2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    normalization buffer and reassign the pos pointer. simple case if pos
2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reside in string, simply copy to normalization buffer and
2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fcdposition = pos, pos = start of normalization buffer. if pos in
2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    normalization buffer, we'll insert the copy infront of pos and point pos
2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    to the start of the normalization buffer. why am i doing these copies?
2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecialCE does
2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    not require any changes, which be really painful. */
2434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (source->flags & UCOL_ITER_INNORMBUF) {
243550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t replaceLength = source->pos - source->writableBuffer.getBuffer();
243650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        source->writableBuffer.replace(0, replaceLength, buffer);
2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source->fcdPosition  = source->pos;
2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source->origFlags    = source->flags;
2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source->flags       |= UCOL_ITER_INNORMBUF;
2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source->flags       &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
244350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        source->writableBuffer = buffer;
2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    source->pos = source->writableBuffer.getTerminatedBuffer();
2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Function to get the discontiguos collation element within the source.
2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Note this function will set the position to the appropriate places.
2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param coll current collator used
2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param source data string source
2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param constart index to the start character in the contraction table
2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return discontiguos collation element offset
2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t getDiscontiguous(const UCollator *coll, collIterate *source,
2459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                const UChar *constart)
2460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* source->pos currently points to the second combining character after
2462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       the start character */
246350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          const UChar *temppos      = source->pos;
246450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          UnicodeString buffer;
2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar   *tempconstart = constart;
2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          uint8_t  tempflags    = source->flags;
2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          UBool    multicontraction = FALSE;
2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          collIterateState discState;
2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          backupState(source, &discState);
2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
247227f654740f2a26ad62a5c155af9199af9e69b889claireho    buffer.setTo(peekCodePoint(source, -1));
2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar    *UCharOffset;
2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar     schar,
2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  tchar;
2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t  result;
2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp)
248027f654740f2a26ad62a5c155af9199af9e69b889claireho            || (peekCodeUnit(source, 0) == 0  &&
2481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //|| (*source->pos == 0  &&
2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ((source->flags & UCOL_ITER_INNORMBUF) == 0 ||
2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 source->fcdPosition == NULL ||
2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 source->fcdPosition == source->endp ||
2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 *(source->fcdPosition) == 0 ||
2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 u_getCombiningClass(*(source->fcdPosition)) == 0)) ||
2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 /* end of string in null terminated string or stopped by a
2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 null character, note fcd does not always point to a base
2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 character after the discontiguos change */
249027f654740f2a26ad62a5c155af9199af9e69b889claireho                 u_getCombiningClass(peekCodePoint(source, 0)) == 0) {
2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 //u_getCombiningClass(*(source->pos)) == 0) {
2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //constart = (UChar *)coll->image + getContractOffset(CE);
2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (multicontraction) {
2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                source->pos    = temppos - 1;
249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                setDiscontiguosAttribute(source, buffer);
2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return *(coll->contractionCEs +
2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    (tempconstart - coll->contractionIndex));
2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            constart = tempconstart;
2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UCharOffset = (UChar *)(tempconstart + 1); /* skip the backward offset*/
2504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        schar = getNextNormalizedChar(source);
2505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (schar > (tchar = *UCharOffset)) {
2507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UCharOffset++;
2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (schar != tchar) {
2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* not the correct codepoint. we stuff the current codepoint into
2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            the discontiguos buffer and try the next character */
251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer.append(schar);
2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
2517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (u_getCombiningClass(schar) ==
251827f654740f2a26ad62a5c155af9199af9e69b889claireho                u_getCombiningClass(peekCodePoint(source, -2))) {
251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                buffer.append(schar);
2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = *(coll->contractionCEs +
2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      (UCharOffset - coll->contractionIndex));
2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (result == UCOL_NOT_FOUND) {
2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          break;
2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (isContraction(result)) {
2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* this is a multi-contraction*/
2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tempconstart = (UChar *)coll->image + getContractOffset(result);
2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*(coll->contractionCEs + (constart - coll->contractionIndex))
2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                != UCOL_NOT_FOUND) {
2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                multicontraction = TRUE;
2534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                temppos       = source->pos + 1;
2535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            setDiscontiguosAttribute(source, buffer);
2538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return result;
2539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* no problems simply reverting just like that,
2543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if we are in string before getting into this function, points back to
2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string hence no problem.
2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if we are in normalization buffer before getting into this function,
2546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    since we'll never use another normalization within this function, we
2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    know that fcdposition points to a base character. the normalization buffer
2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    never change, hence this revert works. */
2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    loadState(source, &discState, TRUE);
2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    goBackOne(source);
2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //source->pos   = temppos - 1;
2553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source->flags = tempflags;
2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *(coll->contractionCEs + (constart - coll->contractionIndex));
2555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* now uses Mark's getImplicitPrimary code */
2558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) {
2560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t r = uprv_uca_getImplicitPrimary(cp);
2561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0;
2562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->offsetRepeatCount += 1;
2563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order'
2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Inserts the argument character into the front of the buffer replacing the
2568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* front null terminator.
2569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data
2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch character to be appended
2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
257350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline void insertBufferFront(collIterate *data, UChar ch)
2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
257550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTerminatedBuffer() + 2;
2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Special normalization function for contraction in the previous iterator.
2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This normalization sequence will place the current character at source->pos
2581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and its following normalized sequence into the buffer.
2582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The fcd position, pos will be changed.
2583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* pos will now point to positions in the buffer.
2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Flags will be changed accordingly.
2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation iterator data
2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void normalizePrevContraction(collIterate *data, UErrorCode *status)
2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
259050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pEnd = data->pos + 1;         /* End normalize + 1 */
259150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pStart;
2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString endOfBuffer;
2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->flags & UCOL_ITER_HASLEN) {
2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        normalization buffer not used yet, we'll pull down the next
2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        character into the end of the buffer
2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
259950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        endOfBuffer.setTo(*pEnd);
2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
260250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        endOfBuffer.setTo(data->writableBuffer, 1);  // after the leading NUL
2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->fcdPosition == NULL) {
2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pStart = data->string;
2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pStart = data->fcdPosition + 1;
2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t normLen =
261250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)),
261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             data->writableBuffer,
261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             *status).
261550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length();
261650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
261750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    this puts the null termination infront of the normalized string instead
2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    of the end
2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
262350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    data->pos =
262450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminatedBuffer() +
262550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        1 + normLen;
2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->origFlags  = data->flags;
2627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     |= UCOL_ITER_INNORMBUF;
2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
2629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Contraction character management function that returns the previous character
2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* for the backwards iterator.
2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Does nothing if the previous character is in buffer and not the first
2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* character in it.
2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Else it checks previous character in data string to see if it is
2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* normalizable.
2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* If it is not, the character is simply copied into the buffer, else
2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the whole normalized substring is copied into the buffer, including the
2640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* current character.
2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param data collation element iterator data
2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return previous character
2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status)
2646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar  prevch;
2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar  ch;
264950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *start;
2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ||
2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (innormbuf && *(data->pos - 1) != 0)) {
2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if no normalization.
2655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if previous character is in normalized buffer, no further normalization
2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        is required
2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if(data->flags & UCOL_USE_ITERATOR) {
2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->iterator->move(data->iterator, -1, UITER_CURRENT);
2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (UChar)data->iterator->next(data->iterator);
2661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      } else {
2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *(data->pos - 1);
2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      }
2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    start = data->pos;
2667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ((data->fcdPosition==NULL)||(data->flags & UCOL_ITER_HASLEN)) {
2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* in data string */
2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((start - 1) == data->string) {
2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(start - 1);
2671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        start --;
2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ch     = *start;
2674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prevch = *(start - 1);
2675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        in writable buffer, at this point fcdPosition can not be NULL.
2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        see contracting tag.
2680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
2681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (data->fcdPosition == data->string) {
2682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* at the start of the string, just dump it into the normalizer */
268350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            insertBufferFront(data, *(data->fcdPosition));
2684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            data->fcdPosition = NULL;
2685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos - 1);
2686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        start  = data->fcdPosition;
2688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ch     = *start;
2689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prevch = *(start - 1);
2690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
2692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * if the current character is not fcd.
2693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Trailing combining class == 0.
2694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
2695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (data->fcdPosition > start &&
2696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       (ch >= NFC_ZERO_CC_BLOCK_LIMIT_ || prevch >= NFC_ZERO_CC_BLOCK_LIMIT_))
2697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
2698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
2699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        Need a more complete FCD check and possible normalization.
2700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        normalize substring will be appended to buffer
2701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        */
270250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *backuppos = data->pos;
2703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->pos = start;
2704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (collPrevIterFCD(data)) {
2705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            normalizePrevContraction(data, status);
2706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *(data->pos - 1);
2707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->pos = backuppos;
2709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->fcdPosition ++;
2710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (innormbuf) {
2713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
2714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    no normalization is to be done hence only one character will be
2715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    appended to the buffer.
2716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
271750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        insertBufferFront(data, ch);
2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        data->fcdPosition --;
2719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ch;
2722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function handles the special CEs like contractions, expansions, surrogates, Thai */
2725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* It is called by getNextCE */
2726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2727b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* The following should be even */
2728b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define UCOL_MAX_DIGITS_FOR_NUMBER 254
2729b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
2730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, collIterate *source, UErrorCode *status) {
2731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collIterateState entryState;
2732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    backupState(source, &entryState);
2733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 cp = ch;
2734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for (;;) {
2736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // This loop will repeat only in the case of contractions, and only when a contraction
2737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   is found and the first CE resulting from that contraction is itself a special
2738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   (an expansion, for example.)  All other special CE types are fully handled the
2739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   first time through, and the loop exits.
2740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        const uint32_t *CEOffset = NULL;
2742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        switch(getCETag(CE)) {
2743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case NOT_FOUND_TAG:
2744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* This one is not found, and we'll let somebody else bother about it... no more games */
2745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return CE;
2746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case SPEC_PROC_TAG:
2747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
2748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Special processing is getting a CE that is preceded by a certain prefix
2749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Currently this is only needed for optimizing Japanese length and iteration marks.
2750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // When we encouter a special processing tag, we go backwards and try to see if
2751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // we have a match.
2752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Contraction tables are used - so the whole process is not unlike contraction.
2753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // prefix data is stored backwards in the table.
2754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                const UChar *UCharOffset;
2755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar schar, tchar;
2756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collIterateState prefixState;
2757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                backupState(source, &prefixState);
2758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                loadState(source, &entryState, TRUE);
2759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goBackOne(source); // We want to look at the point where we entered - actually one
2760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // before that...
2761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for(;;) {
2763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // This loop will run once per source string character, for as long as we
2764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //  are matching a potential contraction sequence
2765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // First we position ourselves at the begining of contraction sequence
2767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
2768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (collIter_bos(source)) {
2769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
2770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    schar = getPrevNormalizedChar(source, status);
2773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goBackOne(source);
2774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
2776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCharOffset++;
2777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (schar == tchar) {
2780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Found the source string char in the table.
2781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  Pick up the corresponding CE from the table.
2782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
2783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (UCharOffset - coll->contractionIndex));
2784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else
2786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    {
2787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Source string char was not in the table.
2788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   We have not found the prefix.
2789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
2790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (ContractionStart - coll->contractionIndex));
2791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isPrefix(CE)) {
2794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // The source string char was in the contraction table, and the corresponding
2795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   CE is not a prefix CE.  We found the prefix, break
2796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   out of loop, this CE will end up being returned.  This is the normal
2797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   way out of prefix handling when the source actually contained
2798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   the prefix.
2799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
2802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE != UCOL_NOT_FOUND) { // we found something and we can merilly continue
2803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    loadState(source, &prefixState, TRUE);
2804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(source->origFlags & UCOL_USE_ITERATOR) {
2805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->flags = source->origFlags;
2806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { // prefix search was a failure, we have to backup all the way to the start
2808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    loadState(source, &entryState, TRUE);
2809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
2811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CONTRACTION_TAG:
2813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
2814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* This should handle contractions */
2815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collIterateState state;
2816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                backupState(source, &state);
2817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t firstCE = *(coll->contractionCEs + ((UChar *)coll->image+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND;
2818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                const UChar *UCharOffset;
2819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar schar, tchar;
2820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for (;;) {
2822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* This loop will run once per source string character, for as long as we     */
2823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*  are matching a potential contraction sequence                  */
2824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* First we position ourselves at the begining of contraction sequence */
2826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
2827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (collIter_eos(source)) {
2829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Ran off the end of the source string.
2830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
2831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // So we'll pick whatever we have at the point...
2832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (CE == UCOL_NOT_FOUND) {
2833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // back up the source over all the chars we scanned going into this contraction.
2834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            CE = firstCE;
2835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            loadState(source, &state, TRUE);
2836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(source->origFlags & UCOL_USE_ITERATOR) {
2837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                source->flags = source->origFlags;
2838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
2839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
2840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint8_t maxCC = (uint8_t)(*(UCharOffset)&0xFF); /*get the discontiguos stuff */ /* skip the backward offset, see above */
2844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8);
2845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    schar = getNextNormalizedChar(source);
2847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
2848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCharOffset++;
2849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (schar == tchar) {
2852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Found the source string char in the contraction table.
2853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  Pick up the corresponding CE from the table.
2854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
2855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (UCharOffset - coll->contractionIndex));
2856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else
2858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    {
2859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Source string char was not in contraction table.
2860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   Unless we have a discontiguous contraction, we have finished
2861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   with this contraction.
2862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // in order to do the proper detection, we
2863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // need to see if we're dealing with a supplementary
2864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* We test whether the next two char are surrogate pairs.
2865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        * This test is done if the iterator is not NULL.
2866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        * If there is no surrogate pair, the iterator
2867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        * goes back one if needed. */
2868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UChar32 miss = schar;
2869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (source->iterator) {
2870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UChar32 surrNextChar; /* the next char in the iteration to test */
2871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            int32_t prevPos; /* holds the previous position before move forward of the source iterator */
2872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(U16_IS_LEAD(schar) && source->iterator->hasNext(source->iterator)) {
2873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                prevPos = source->iterator->index;
2874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                surrNextChar = getNextNormalizedChar(source);
2875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (U16_IS_TRAIL(surrNextChar)) {
2876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    miss = U16_GET_SUPPLEMENTARY(schar, surrNextChar);
2877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else if (prevPos < source->iterator->index){
2878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    goBackOne(source);
2879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
2880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
2881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if (U16_IS_LEAD(schar)) {
2882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source));
2883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
2884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        uint8_t sCC;
2886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (miss < 0x300 ||
2887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            maxCC == 0 ||
2888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (sCC = i_getCombiningClass(miss, coll)) == 0 ||
2889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sCC>maxCC ||
2890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (allSame != 0 && sCC == maxCC) ||
2891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            collIter_eos(source))
2892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        {
2893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //  Contraction can not be discontiguous.
2894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goBackOne(source);  // back up the source string by one,
2895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //  because  the character we just looked at was
2896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //  not part of the contraction.   */
2897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(U_IS_SUPPLEMENTARY(miss)) {
2898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goBackOne(source);
2899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
2900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            CE = *(coll->contractionCEs +
2901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                (ContractionStart - coll->contractionIndex));
2902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
2903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //
2904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // Contraction is possibly discontiguous.
2905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //   Scan more of source string looking for a match
2906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //
2907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UChar tempchar;
2908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* find the next character if schar is not a base character
2909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            and we are not yet at the end of the string */
2910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tempchar = getNextNormalizedChar(source);
2911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // probably need another supplementary thingie here
2912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goBackOne(source);
2913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (i_getCombiningClass(tempchar, coll) == 0) {
2914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goBackOne(source);
2915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if(U_IS_SUPPLEMENTARY(miss)) {
2916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    goBackOne(source);
2917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
2918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                /* Spit out the last char of the string, wasn't tasty enough */
2919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                CE = *(coll->contractionCEs +
2920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    (ContractionStart - coll->contractionIndex));
2921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
2922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                CE = getDiscontiguous(coll, source, ContractionStart);
2923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
2924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
2925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } // else after if(schar == tchar)
2926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE == UCOL_NOT_FOUND) {
2928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /* The Source string did not match the contraction that we were checking.  */
2929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /*  Back up the source position to undo the effects of having partially    */
2930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /*   scanned through what ultimately proved to not be a contraction.       */
2931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        loadState(source, &state, TRUE);
2932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = firstCE;
2933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isContraction(CE)) {
2937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // The source string char was in the contraction table, and the corresponding
2938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   CE is not a contraction CE.  We completed the contraction, break
2939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   out of loop, this CE will end up being returned.  This is the normal
2940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   way out of contraction handling when the source actually contained
2941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   the contraction.
2942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
2943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // The source string char was in the contraction table, and the corresponding
2947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   CE is IS  a contraction CE.  We will continue looping to check the source
2948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   string for the remaining chars in the contraction.
2949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));
2950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tempCE != UCOL_NOT_FOUND) {
2951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // We have scanned a a section of source string for which there is a
2952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  CE from the contraction table.  Remember the CE and scan position, so
2953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  that we can return to this point if further scanning fails to
2954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  match a longer contraction sequence.
2955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        firstCE = tempCE;
2956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goBackOne(source);
2958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        backupState(source, &state);
2959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        getNextNormalizedChar(source);
2960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Another way to do this is:
2962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //collIterateState tempState;
2963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //backupState(source, &tempState);
2964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //goBackOne(source);
2965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //backupState(source, &state);
2966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //loadState(source, &tempState, TRUE);
2967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // The problem is that for incomplete contractions we have to remember the previous
2969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // position. Before, the only thing I needed to do was state.pos--;
2970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // After iterator introduction and especially after introduction of normalizing
2971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // iterators, it became much more difficult to decrease the saved state.
2972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // I'm not yet sure which of the two methods above is faster.
2973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
2974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } // for(;;)
2975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
2976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } // case CONTRACTION_TAG:
2977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case LONG_PRIMARY_TAG:
2978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
2979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
2980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
2981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->offsetRepeatCount += 1;
2982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return CE;
2983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case EXPANSION_TAG:
2985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
2986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* This should handle expansion. */
2987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* NOTE: we can encounter both continuations and expansions in an expansion! */
2988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* I have to decide where continuations are going to be dealt with */
2989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t size;
2990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t i;    /* general counter */
2991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
2993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                size = getExpansionCount(CE);
2994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = *CEOffset++;
299527f654740f2a26ad62a5c155af9199af9e69b889claireho              //source->offsetRepeatCount = -1;
2996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
2998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    for(i = 1; i<size; i++) {
2999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = *CEOffset++;
300027f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->offsetRepeatCount += 1;
3001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* else, we do */
3003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(*CEOffset != 0) {
3004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = *CEOffset++;
300527f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->offsetRepeatCount += 1;
3006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return CE;
3010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case DIGIT_TAG:
3012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
3014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                We do a check to see if we want to collate digits as numbers; if so we generate
3015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                a custom collation key. Otherwise we pull out the value stored in the expansion table.
3016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
3017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //uint32_t size;
3018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t i;    /* general counter */
3019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->coll->numericCollation == UCOL_ON){
3021b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    collIterateState digitState = {0,0,0,0,0,0,0,0,0};
3022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UChar32 char32 = 0;
3023b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    int32_t digVal = 0;
3024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t digIndx = 0;
3026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t endIndex = 0;
3027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t trailingZeroIndex = 0;
3028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint8_t collateVal = 0;
3030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UBool nonZeroValReached = FALSE;
3032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3033b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3]; // I just need a temporary place to store my generated CEs.
3034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                         We parse the source string until we hit a char that's NOT a digit.
3036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        Use this u_charDigitValue. This might be slow because we have to
3037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        handle surrogates...
3038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /*
3040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (U16_IS_LEAD(ch)){
3041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                      if (!collIter_eos(source)) {
3042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        backupState(source, &digitState);
3043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UChar trail = getNextNormalizedChar(source);
3044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(U16_IS_TRAIL(trail)) {
3045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                          char32 = U16_GET_SUPPLEMENTARY(ch, trail);
3046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
3047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                          loadState(source, &digitState, TRUE);
3048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                          char32 = ch;
3049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                      } else {
3051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        char32 = ch;
3052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                      }
3053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
3054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      char32 = ch;
3055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    digVal = u_charDigitValue(char32);
3057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            */
3058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    digVal = u_charDigitValue(cp); // if we have arrived here, we have
3059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // already processed possible supplementaries that trigered the digit tag -
3060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // all supplementaries are marked in the UCA.
3061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        We  pad a zero in front of the first element anyways. This takes
3063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        care of the (probably) most common case where people are sorting things followed
3064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        by a single digit
3065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    digIndx++;
3067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    for(;;){
3068b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // Make sure we have enough space. No longer needed;
3069b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // at this point digIndx now has a max value of UCOL_MAX_DIGITS_FOR_NUMBER
3070b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // (it has been pre-incremented) so we just ensure that numTempBuf is big enough
3071b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3).
3072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Skipping over leading zeroes.
3074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (digVal != 0) {
3075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            nonZeroValReached = TRUE;
3076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (nonZeroValReached) {
3078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /*
3079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            We parse the digit string into base 100 numbers (this fits into a byte).
3080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            We only add to the buffer in twos, thus if we are parsing an odd character,
3081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            that serves as the 'tens' digit while the if we are parsing an even one, that
3082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
3083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
3084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
3085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            than all the other bytes.
3086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            */
3087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (digIndx % 2 == 1){
3089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                collateVal += (uint8_t)digVal;
3090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // We don't enter the low-order-digit case unless we've already seen
3092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // the high order, or for the first digit, which is always non-zero.
3093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (collateVal != 0)
3094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    trailingZeroIndex = 0;
3095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
3097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                collateVal = 0;
3098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            else{
3100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // We drop the collation value into the buffer so if we need to do
3101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // a "front patch" we don't have to check to see if we're hitting the
3102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // last element.
3103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                collateVal = (uint8_t)(digVal * 10);
3104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // Check for trailing zeroes.
3106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (collateVal == 0)
3107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                {
3108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if (!trailingZeroIndex)
3109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        trailingZeroIndex = (digIndx/2) + 2;
3110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                else
3112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    trailingZeroIndex = 0;
3113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
3115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            digIndx++;
3117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Get next character.
3120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (!collIter_eos(source)){
3121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            ch = getNextNormalizedChar(source);
3122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (U16_IS_LEAD(ch)){
3123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (!collIter_eos(source)) {
3124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    backupState(source, &digitState);
3125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    UChar trail = getNextNormalizedChar(source);
3126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(U16_IS_TRAIL(trail)) {
3127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        char32 = U16_GET_SUPPLEMENTARY(ch, trail);
3128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    } else {
3129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        loadState(source, &digitState, TRUE);
3130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        char32 = ch;
3131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
3132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
3134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                char32 = ch;
3135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if ((digVal = u_charDigitValue(char32)) == -1 || digIndx > UCOL_MAX_DIGITS_FOR_NUMBER){
3138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // Resetting position to point to the next unprocessed char. We
3139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                // overshot it when doing our test/set for numbers.
3140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if (char32 > 0xFFFF) { // For surrogates.
3141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    loadState(source, &digitState, TRUE);
3142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    //goBackOne(source);
3143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goBackOne(source);
3145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                break;
3146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
3148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            break;
3149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (nonZeroValReached == FALSE){
3153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        digIndx = 2;
3154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        numTempBuf[2] = 6;
3155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;
3158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (digIndx % 2 != 0){
3159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        /*
3160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what
3161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward.
3162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a
3163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        single pass and optimizes for strings with single digits. I'm just assuming that's the more common case.
3164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        */
3165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        for(i = 2; i < endIndex; i++){
3167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            numTempBuf[i] =     (((((numTempBuf[i] - 6)/2) % 10) * 10) +
3168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
3169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        --digIndx;
3171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Subtract one off of the last byte.
3174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[endIndex-1] -= 1;
3175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    We want to skip over the first two slots in the buffer. The first slot
3178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
3179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
3180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
3182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
3183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Now transfer the collation key to our collIterate struct.
3185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
3186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //size = ((endIndex+1) & ~1)/2;
3187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
3188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
3189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_BYTE_COMMON; // Tertiary weight.
3190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    i = 2; // Reset the index into the buffer.
3191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(i < endIndex)
3192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    {
3193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        uint32_t primWeight = numTempBuf[i++] << 8;
3194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if ( i < endIndex)
3195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            primWeight |= numTempBuf[i++];
3196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
3197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // no numeric mode, we'll just switch to whatever we stashed and continue
3201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
3202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = *CEOffset++;
3203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
3204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return CE;
3206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* various implicits optimization */
3208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case IMPLICIT_TAG:        /* everything that is not defined otherwise */
3209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* UCA is filled with these. Tailorings are NOT_FOUND */
3210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return getImplicit(cp, source);
3211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
3212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImplicit
3213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return getImplicit(cp, source);
3214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
3215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t
3217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
3218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t LCount = 19;
3219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t VCount = 21;
3220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t TCount = 28;
3221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t NCount = VCount * TCount;   // 588
3222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t SCount = LCount * NCount;   // 11172
3223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t L = ch - SBase;
3224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // divide into pieces
3226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t T = L % TCount; // we do it in this order since some compilers can do % and / in one operation
3228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L /= TCount;
3229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t V = L % VCount;
3230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L /= VCount;
3231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // offset them
3233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L += LBase;
3235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                V += VBase;
3236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                T += TBase;
3237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // return the first CE, but first put the rest into the expansion buffer
3239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (!source->coll->image->jamoSpecial) { // FAST PATH
3240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
3242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (T != TBase) {
3243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
3244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
3247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { // Jamo is Special
3249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Since Hanguls pass the FCD check, it is
3250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // guaranteed that we won't be in
3251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // the normalization buffer if something like this happens
3252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
3253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // However, if we are using a uchar iterator and normalization
3254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // is ON, the Hangul that lead us here is going to be in that
3255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // normalization buffer. Here we want to restore the uchar
3256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // iterator state and pull out of the normalization buffer
3257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) {
3258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->flags = source->origFlags; // restore the iterator
3259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->pos = NULL;
3260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
3262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Move Jamos into normalization buffer
326350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar *buffer = source->writableBuffer.getBuffer(4);
326450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t bufferLength;
326550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    buffer[0] = (UChar)L;
326650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    buffer[1] = (UChar)V;
3267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (T != TBase) {
326850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        buffer[2] = (UChar)T;
326950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        bufferLength = 3;
3270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
327150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        bufferLength = 2;
3272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
327350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    source->writableBuffer.releaseBuffer(bufferLength);
3274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // Indicate where to continue in main input string after exhausting the writableBuffer
3276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    source->fcdPosition       = source->pos;
3277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    source->pos   = source->writableBuffer.getTerminatedBuffer();
3279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->origFlags   = source->flags;
3280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->flags       |= UCOL_ITER_INNORMBUF;
3281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->flags       &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return(UCOL_IGNORABLE);
3284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case SURROGATE_TAG:
3287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* we encountered a leading surrogate. We shall get the CE by using the following code unit */
3288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* two things can happen here: next code point can be a trailing surrogate - we will use it */
3289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* to retrieve the CE, or it is not a trailing surrogate (or the string is done). In that case */
329027f654740f2a26ad62a5c155af9199af9e69b889claireho            /* we treat it like an unassigned code point. */
3291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar trail;
3293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collIterateState state;
3294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                backupState(source, &state);
3295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
3296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // we chould have stepped one char forward and it might have turned that it
3297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // was not a trail surrogate. In that case, we have to backup.
3298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    loadState(source, &state, TRUE);
329927f654740f2a26ad62a5c155af9199af9e69b889claireho                    return UCOL_NOT_FOUND;
3300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* TODO: CE contain the data from the previous CE + the mask. It should at least be unmasked */
3302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFFFF, trail);
3303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates in this block, but not this one.
3304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // We need to backup
3305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        loadState(source, &state, TRUE);
3306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return CE;
3307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // calculate the supplementary code point value, if surrogate was not tailored
3309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
3310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
3313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case LEAD_SURROGATE_TAG:  /* D800-DBFF*/
3314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UChar nextChar;
3315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if( source->flags & UCOL_USE_ITERATOR) {
3316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source->iterator))) {
3317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
3318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->iterator->next(source->iterator);
3319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return getImplicit(cp, source);
3320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) || (source->pos<source->endp)) &&
332227f654740f2a26ad62a5c155af9199af9e69b889claireho                      U_IS_TRAIL((nextChar=*source->pos))) {
332327f654740f2a26ad62a5c155af9199af9e69b889claireho                cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
332427f654740f2a26ad62a5c155af9199af9e69b889claireho                source->pos++;
332527f654740f2a26ad62a5c155af9199af9e69b889claireho                return getImplicit(cp, source);
3326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
332727f654740f2a26ad62a5c155af9199af9e69b889claireho            return UCOL_NOT_FOUND;
3328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
332927f654740f2a26ad62a5c155af9199af9e69b889claireho            return UCOL_NOT_FOUND; /* broken surrogate sequence */
3330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CHARSET_TAG:
3331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* not yet implemented */
3332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* probably after 1.8 */
3333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return UCOL_NOT_FOUND;
3334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        default:
3335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_INTERNAL_PROGRAM_ERROR;
3336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE=0;
3337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
3338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (CE <= UCOL_NOT_FOUND) break;
3340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return CE;
3342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* now uses Mark's getImplicitPrimary code */
3346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
3347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) {
3348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t r = uprv_uca_getImplicitPrimary(cp);
3349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505;
3351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collationSource->toReturn = collationSource->CEpos;
3352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
335327f654740f2a26ad62a5c155af9199af9e69b889claireho    // **** doesn't work if using iterator ****
335427f654740f2a26ad62a5c155af9199af9e69b889claireho    if (collationSource->flags & UCOL_ITER_INNORMBUF) {
335527f654740f2a26ad62a5c155af9199af9e69b889claireho        collationSource->offsetRepeatCount = 1;
335627f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
335727f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->string);
3358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
335927f654740f2a26ad62a5c155af9199af9e69b889claireho        UErrorCode errorCode = U_ZERO_ERROR;
336027f654740f2a26ad62a5c155af9199af9e69b889claireho        collationSource->appendOffset(firstOffset, errorCode);
336127f654740f2a26ad62a5c155af9199af9e69b889claireho        collationSource->appendOffset(firstOffset + 1, errorCode);
3362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
336327f654740f2a26ad62a5c155af9199af9e69b889claireho        collationSource->offsetReturn = collationSource->offsetStore - 1;
336427f654740f2a26ad62a5c155af9199af9e69b889claireho        *(collationSource->offsetBuffer) = firstOffset;
336527f654740f2a26ad62a5c155af9199af9e69b889claireho        if (collationSource->offsetReturn == collationSource->offsetBuffer) {
336627f654740f2a26ad62a5c155af9199af9e69b889claireho            collationSource->offsetStore = collationSource->offsetBuffer;
336727f654740f2a26ad62a5c155af9199af9e69b889claireho        }
336827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
3369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return ((r & 0x0000FFFF)<<16) | 0x000000C0;
3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
3374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function handles the special CEs like contractions, expansions,
3375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * surrogates, Thai.
3376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is called by both getPrevCE
3377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
3378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
3379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          collIterate *source,
3380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          UErrorCode *status)
3381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
3382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const uint32_t *CEOffset    = NULL;
3383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          UChar    *UCharOffset = NULL;
3384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          UChar    schar;
3385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UChar    *constart    = NULL;
3386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          uint32_t size;
3387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          UChar    buffer[UCOL_MAX_BUFFER];
3388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          uint32_t *endCEBuffer;
3389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          UChar   *strbuffer;
3390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          int32_t noChars = 0;
3391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          int32_t CECount = 0;
3392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for(;;)
3394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
3395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* the only ces that loops are thai and contractions */
3396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        switch (getCETag(CE))
3397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
3398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case NOT_FOUND_TAG:  /* this tag always returns */
3399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return CE;
3400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case SPEC_PROC_TAG:
3402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Special processing is getting a CE that is preceded by a certain prefix
3404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Currently this is only needed for optimizing Japanese length and iteration marks.
3405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // When we encouter a special processing tag, we go backwards and try to see if
3406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // we have a match.
3407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Contraction tables are used - so the whole process is not unlike contraction.
3408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // prefix data is stored backwards in the table.
3409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                const UChar *UCharOffset;
3410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar schar, tchar;
3411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                collIterateState prefixState;
3412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                backupState(source, &prefixState);
3413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for(;;) {
3414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // This loop will run once per source string character, for as long as we
3415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //  are matching a potential contraction sequence
3416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // First we position ourselves at the begining of contraction sequence
3418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
3419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (collIter_bos(source)) {
3421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
3422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
3423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    schar = getPrevNormalizedChar(source, status);
3425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goBackOne(source);
3426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
3428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCharOffset++;
3429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (schar == tchar) {
3432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Found the source string char in the table.
3433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //  Pick up the corresponding CE from the table.
3434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
3435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (UCharOffset - coll->contractionIndex));
3436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else
3438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    {
3439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // if there is a completely ignorable code point in the middle of
3440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // a prefix, we need to act as if it's not there
3441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero)
3442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // lone surrogates cannot be set to zero as it would break other processing
3443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
3444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // it's easy for BMP code points
3445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(isZeroCE == 0) {
3446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
344727f654740f2a26ad62a5c155af9199af9e69b889claireho                        } else if(U16_IS_SURROGATE(schar)) {
3448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // for supplementary code points, we have to check the next one
3449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // situations where we are going to ignore
3450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // 1. beginning of the string: schar is a lone surrogate
3451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // 2. schar is a lone surrogate
3452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            // 3. schar is a trail surrogate in a valid surrogate sequence
3453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //    that is explicitly set to zero.
3454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (!collIter_bos(source)) {
3455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                UChar lead;
345627f654740f2a26ad62a5c155af9199af9e69b889claireho                                if(!U16_IS_SURROGATE_LEAD(schar) && U16_IS_LEAD(lead = getPrevNormalizedChar(source, status))) {
3457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, lead);
345827f654740f2a26ad62a5c155af9199af9e69b889claireho                                    if(isSpecial(isZeroCE) && getCETag(isZeroCE) == SURROGATE_TAG) {
3459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar);
3460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        if(finalCE == 0) {
3461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                            // this is a real, assigned completely ignorable code point
3462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                            goBackOne(source);
3463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                            continue;
3464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        }
3465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
3466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
346727f654740f2a26ad62a5c155af9199af9e69b889claireho                                    // lone surrogate, treat like unassigned
346827f654740f2a26ad62a5c155af9199af9e69b889claireho                                    return UCOL_NOT_FOUND;
3469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3470c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
347127f654740f2a26ad62a5c155af9199af9e69b889claireho                                // lone surrogate at the beggining, treat like unassigned
347227f654740f2a26ad62a5c155af9199af9e69b889claireho                                return UCOL_NOT_FOUND;
3473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
3475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Source string char was not in the table.
3476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   We have not found the prefix.
3477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
3478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (ContractionStart - coll->contractionIndex));
3479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isPrefix(CE)) {
3482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // The source string char was in the contraction table, and the corresponding
3483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   CE is not a prefix CE.  We found the prefix, break
3484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   out of loop, this CE will end up being returned.  This is the normal
3485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   way out of prefix handling when the source actually contained
3486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        //   the prefix.
3487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
3488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                loadState(source, &prefixState, TRUE);
3491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
3492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
349450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case CONTRACTION_TAG: {
3495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* to ensure that the backwards and forwards iteration matches, we
3496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            take the current region of most possible match and pass it through
3497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            the forward iteration. this will ensure that the obstinate problem of
3498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            overlapping contractions will not occur.
3499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            */
350027f654740f2a26ad62a5c155af9199af9e69b889claireho            schar = peekCodeUnit(source, 0);
3501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            constart = (UChar *)coll->image + getContractOffset(CE);
3502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (isAtStartPrevIterate(source)
3503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* commented away contraction end checks after adding the checks
3504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                in getPrevCE  */) {
3505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* start of string or this is not the end of any contraction */
3506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = *(coll->contractionCEs +
3507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        (constart - coll->contractionIndex));
3508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
3509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            strbuffer = buffer;
3511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1);
3512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *(UCharOffset --) = 0;
3513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            noChars = 0;
3514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // have to swap thai characters
3515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while (ucol_unsafeCP(schar, coll)) {
3516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(UCharOffset) = schar;
3517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                noChars++;
3518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UCharOffset --;
3519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                schar = getPrevNormalizedChar(source, status);
3520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goBackOne(source);
3521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // TODO: when we exhaust the contraction buffer,
3522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // it needs to get reallocated. The problem is
3523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // that the size depends on the string which is
3524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // not iterated over. However, since we're travelling
3525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // backwards, we already had to set the iterator at
3526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // the end - so we might as well know where we are?
3527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (UCharOffset + 1 == buffer) {
3528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* we have exhausted the buffer */
3529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    int32_t newsize = 0;
3530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(source->pos) { // actually dealing with a position
353150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        newsize = (int32_t)(source->pos - source->string + 1);
3532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else { // iterator
3533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        newsize = 4 * UCOL_MAX_BUFFER;
3534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    strbuffer = (UChar *)uprv_malloc(sizeof(UChar) *
3536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        (newsize + UCOL_MAX_BUFFER));
3537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* test for NULL */
3538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (strbuffer == NULL) {
3539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_MEMORY_ALLOCATION_ERROR;
3540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_NO_MORE_CES;
3541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UCharOffset = strbuffer + newsize;
3543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uprv_memcpy(UCharOffset, buffer,
3544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_MAX_BUFFER * sizeof(UChar));
3545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UCharOffset --;
3546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if ((source->pos && (source->pos == source->string ||
3548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    ((source->flags & UCOL_ITER_INNORMBUF) &&
3549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->pos - 1) == 0 && source->fcdPosition == NULL)))
3550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    || (source->iterator && !source->iterator->hasPrevious(source->iterator))) {
3551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
3552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* adds the initial base character to the string */
3555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *(UCharOffset) = schar;
3556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            noChars++;
3557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            int32_t offsetBias;
3559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // **** doesn't work if using iterator ****
3561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (source->flags & UCOL_ITER_INNORMBUF) {
3562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                offsetBias = -1;
3563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
3564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                offsetBias = (int32_t)(source->pos - source->string);
3565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* a new collIterate is used to simplify things, since using the current
3568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            collIterate will mean that the forward and backwards iteration will
3569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            share and change the same buffers. we don't want to get into that. */
3570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            collIterate temp;
3571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            int32_t rawOffset;
3572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            IInit_collIterate(coll, UCharOffset, noChars, &temp, status);
357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(U_FAILURE(*status)) {
357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return UCOL_NULLORDER;
357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
3577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            temp.flags &= ~UCOL_ITER_NORM;
3578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            temp.flags |= source->flags & UCOL_FORCE_HAN_IMPLICIT;
3579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
358050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            rawOffset = (int32_t)(temp.pos - temp.string); // should always be zero?
3581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE = ucol_IGetNextCE(coll, &temp, status);
3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (source->extendCEs) {
3584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                endCEBuffer = source->extendCEs + source->extendCEsSize;
358550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                CECount = (int32_t)((source->CEpos - source->extendCEs)/sizeof(uint32_t));
3586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
3587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE;
358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                CECount = (int32_t)((source->CEpos - source->CEs)/sizeof(uint32_t));
3589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while (CE != UCOL_NO_MORE_CES) {
3592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->CEpos ++) = CE;
3593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (offsetBias >= 0) {
359527f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->appendOffset(rawOffset + offsetBias, *status);
3596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CECount++;
3599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->CEpos == endCEBuffer) {
3600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* ran out of CE space, reallocate to new buffer.
3601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    If reallocation fails, reset pointers and bail out,
3602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    there's no guarantee of the right character position after
3603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    this bail*/
360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (!increaseCEsCapacity(source)) {
3605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_MEMORY_ALLOCATION_ERROR;
360627f654740f2a26ad62a5c155af9199af9e69b889claireho                        break;
3607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endCEBuffer = source->extendCEs + source->extendCEsSize;
3610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) {
361350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    rawOffset = (int32_t)(temp.fcdPosition - temp.string);
3614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                } else {
361550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    rawOffset = (int32_t)(temp.pos - temp.string);
3616b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                }
3617b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &temp, status);
3619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
362127f654740f2a26ad62a5c155af9199af9e69b889claireho            if (strbuffer != buffer) {
362227f654740f2a26ad62a5c155af9199af9e69b889claireho                uprv_free(strbuffer);
362327f654740f2a26ad62a5c155af9199af9e69b889claireho            }
362427f654740f2a26ad62a5c155af9199af9e69b889claireho            if (U_FAILURE(*status)) {
362527f654740f2a26ad62a5c155af9199af9e69b889claireho                return (uint32_t)UCOL_NULLORDER;
362627f654740f2a26ad62a5c155af9199af9e69b889claireho            }
362727f654740f2a26ad62a5c155af9199af9e69b889claireho
362827f654740f2a26ad62a5c155af9199af9e69b889claireho            if (source->offsetRepeatValue != 0) {
3629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (CECount > noChars) {
363027f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->offsetRepeatCount += temp.offsetRepeatCount;
3631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // **** does this really skip the right offsets? ****
3633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetReturn -= (noChars - CECount);
3634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (offsetBias >= 0) {
3638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->offsetReturn = source->offsetStore - 1;
3639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->offsetReturn == source->offsetBuffer) {
3640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetStore = source->offsetBuffer;
3641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            source->toReturn = source->CEpos - 1;
3645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (source->toReturn == source->CEs) {
3646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->CEpos = source->CEs;
3647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *(source->toReturn);
365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
3651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case LONG_PRIMARY_TAG:
3652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
3654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
3655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->toReturn = source->CEpos - 1;
3656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
365727f654740f2a26ad62a5c155af9199af9e69b889claireho                if (source->flags & UCOL_ITER_INNORMBUF) {
3658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetRepeatCount = 1;
365927f654740f2a26ad62a5c155af9199af9e69b889claireho                } else {
366027f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t firstOffset = (int32_t)(source->pos - source->string);
3661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
366227f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->appendOffset(firstOffset, *status);
366327f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->appendOffset(firstOffset + 1, *status);
3664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
366527f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->offsetReturn = source->offsetStore - 1;
366627f654740f2a26ad62a5c155af9199af9e69b889claireho                    *(source->offsetBuffer) = firstOffset;
366727f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (source->offsetReturn == source->offsetBuffer) {
366827f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->offsetStore = source->offsetBuffer;
366927f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
367027f654740f2a26ad62a5c155af9199af9e69b889claireho                }
3671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return *(source->toReturn);
3674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case EXPANSION_TAG: /* this tag always returns */
3677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /*
3679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            This should handle expansion.
3680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            NOTE: we can encounter both continuations and expansions in an expansion!
3681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            I have to decide where continuations are going to be dealt with
3682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            */
3683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            int32_t firstOffset = (int32_t)(source->pos - source->string);
3684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // **** doesn't work if using iterator ****
3686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (source->offsetReturn != NULL) {
3687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetReturn == source->offsetBuffer) {
3688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetStore = source->offsetBuffer;
3689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }else {
3690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                  firstOffset = -1;
3691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* find the offset to expansion table */
3695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
3696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            size     = getExpansionCount(CE);
3697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (size != 0) {
3698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
3699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if there are less than 16 elements in expansion, we don't terminate
3700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
3701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t count;
3702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for (count = 0; count < size; count++) {
3704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos ++) = *CEOffset++;
3705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (firstOffset >= 0) {
370727f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->appendOffset(firstOffset + 1, *status);
3708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
3711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* else, we do */
3712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (*CEOffset != 0) {
3713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos ++) = *CEOffset ++;
3714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (firstOffset >= 0) {
371627f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->appendOffset(firstOffset + 1, *status);
3717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (firstOffset >= 0) {
3722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->offsetReturn = source->offsetStore - 1;
3723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *(source->offsetBuffer) = firstOffset;
3724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->offsetReturn == source->offsetBuffer) {
3725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->offsetStore = source->offsetBuffer;
3726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
3727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
3728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->offsetRepeatCount += size - 1;
3729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            source->toReturn = source->CEpos - 1;
3732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // in case of one element expansion, we
3733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // want to immediately return CEpos
3734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(source->toReturn == source->CEs) {
3735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                source->CEpos = source->CEs;
3736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *(source->toReturn);
3739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case DIGIT_TAG:
3742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
3744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                We do a check to see if we want to collate digits as numbers; if so we generate
3745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                a custom collation key. Otherwise we pull out the value stored in the expansion table.
3746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
3747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t i;    /* general counter */
3748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (source->coll->numericCollation == UCOL_ON){
3750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t digIndx = 0;
3751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t endIndex = 0;
3752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t leadingZeroIndex = 0;
3753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t trailingZeroCount = 0;
3754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint8_t collateVal = 0;
3756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UBool nonZeroValReached = FALSE;
3758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2]; // I just need a temporary place to store my generated CEs.
3760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    We parse the source string until we hit a char that's NOT a digit.
3762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    Use this u_charDigitValue. This might be slow because we have to
3763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    handle surrogates...
3764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3765b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    /*
3766b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    We need to break up the digit string into collection elements of UCOL_MAX_DIGITS_FOR_NUMBER or less,
3767b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    with any chunks smaller than that being on the right end of the digit string - i.e. the first collation
3768b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    element we process when going backward. To determine how long that chunk might be, we may need to make
3769b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    two passes through the loop that collects digits - one to see how long the string is (and how much is
3770b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    leading zeros) to determine the length of that right-hand chunk, and a second (if the whole string has
3771b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    more than UCOL_MAX_DIGITS_FOR_NUMBER non-leading-zero digits) to actually process that collation
3772b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    element chunk after resetting the state to the initialState at the right side of the digit string.
3773b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    */
3774b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    uint32_t ceLimit = 0;
3775b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    UChar initial_ch = ch;
3776b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    collIterateState initialState = {0,0,0,0,0,0,0,0,0};
3777b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    backupState(source, &initialState);
3778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3779b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    for(;;) {
3780b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        collIterateState state = {0,0,0,0,0,0,0,0,0};
3781b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        UChar32 char32 = 0;
3782b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        int32_t digVal = 0;
3783b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3784b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        if (U16_IS_TRAIL (ch)) {
3785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if (!collIter_bos(source)){
3786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                UChar lead = getPrevNormalizedChar(source, status);
3787b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                if(U16_IS_LEAD(lead)) {
3788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    char32 = U16_GET_SUPPLEMENTARY(lead,ch);
3789b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    goBackOne(source);
3790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                } else {
3791b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    char32 = ch;
3792b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                }
3793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
3794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                char32 = ch;
3795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
3796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
3797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            char32 = ch;
3798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
3799b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        digVal = u_charDigitValue(char32);
3800b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3801b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        for(;;) {
3802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // Make sure we have enough space. No longer needed;
3803b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // at this point the largest value of digIndx when we need to save data in numTempBuf
3804b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // is UCOL_MAX_DIGITS_FOR_NUMBER-1 (digIndx is post-incremented) so we just ensure
3805b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // that numTempBuf is big enough (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2).
3806b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3807b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // Skip over trailing zeroes, and keep a count of them.
3808b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if (digVal != 0)
3809b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                nonZeroValReached = TRUE;
3810b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if (nonZeroValReached) {
3812b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                /*
3813b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                We parse the digit string into base 100 numbers (this fits into a byte).
3814b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                We only add to the buffer in twos, thus if we are parsing an odd character,
3815b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                that serves as the 'tens' digit while the if we are parsing an even one, that
3816b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
3817b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
3818b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
3819b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                than all the other bytes.
3820b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3821b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                Since we're doing in this reverse we want to put the first digit encountered into the
3822b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                ones place and the second digit encountered into the tens place.
3823b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                */
3824b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3825b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                if ((digIndx + trailingZeroCount) % 2 == 1) {
3826b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // High-order digit case (tens place)
3827b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    collateVal += (uint8_t)(digVal * 10);
3828b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3829b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // We cannot set leadingZeroIndex unless it has been set for the
3830b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // low-order digit. Therefore, all we can do for the high-order
3831b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // digit is turn it off, never on.
3832b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // The only time we will have a high digit without a low is for
3833b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // the very first non-zero digit, so no zero check is necessary.
3834b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if (collateVal != 0)
3835b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        leadingZeroIndex = 0;
3836b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3837b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // The first pass through, digIndx may exceed the limit, but in that case
3838b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // we no longer care about numTempBuf contents since they will be discarded
3839b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) {
3840b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
3841b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    }
3842b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    collateVal = 0;
3843b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                } else {
3844b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // Low-order digit case (ones place)
3845b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    collateVal = (uint8_t)digVal;
3846b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3847b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // Check for leading zeroes.
3848b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if (collateVal == 0) {
3849b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        if (!leadingZeroIndex)
3850b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                            leadingZeroIndex = (digIndx/2) + 2;
3851b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    } else
3852b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        leadingZeroIndex = 0;
3853b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3854b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // No need to write to buffer; the case of a last odd digit
3855b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // is handled below.
3856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3857b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                ++digIndx;
3858b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            } else
3859b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                ++trailingZeroCount;
3860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3861b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            if (!collIter_bos(source)) {
3862b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                ch = getPrevNormalizedChar(source, status);
3863b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                //goBackOne(source);
3864b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                if (U16_IS_TRAIL(ch)) {
3865b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    backupState(source, &state);
3866b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if (!collIter_bos(source)) {
3867b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        goBackOne(source);
3868b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        UChar lead = getPrevNormalizedChar(source, status);
3869b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3870b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        if(U16_IS_LEAD(lead)) {
3871b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                            char32 = U16_GET_SUPPLEMENTARY(lead,ch);
3872b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        } else {
3873b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                            loadState(source, &state, FALSE);
3874b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                            char32 = ch;
3875b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                        }
3876b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    }
3877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else
3878b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    char32 = ch;
3879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3880b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                if ((digVal = u_charDigitValue(char32)) == -1 || (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) {
3881b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    if (char32 > 0xFFFF) {// For surrogates.
3882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        loadState(source, &state, FALSE);
3883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
3884b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // Don't need to "reverse" the goBackOne call,
3885b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    // as this points to the next position to process..
3886b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    //if (char32 > 0xFFFF) // For surrogates.
3887b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    //getNextNormalizedChar(source);
3888b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                    break;
3889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
3890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3891b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                goBackOne(source);
3892b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            }else
3893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                break;
3894b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        }
3895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3896b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_NUMBER) {
3897b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            // our collation element is not too big, go ahead and finish with it
3898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            break;
3899b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        }
3900b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // our digit string is too long for a collation element;
3901b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        // set the limit for it, reset the state and begin again
3902b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGITS_FOR_NUMBER;
3903b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        if ( ceLimit == 0 ) {
3904b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                            ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER;
3905b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        }
3906b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        ch = initial_ch;
3907b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        loadState(source, &initialState, FALSE);
3908b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        digIndx = endIndex = leadingZeroIndex = trailingZeroCount = 0;
3909b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        collateVal = 0;
3910b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        nonZeroValReached = FALSE;
3911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (! nonZeroValReached) {
3914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        digIndx = 2;
3915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        trailingZeroCount = 0;
3916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        numTempBuf[2] = 6;
3917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if ((digIndx + trailingZeroCount) % 2 != 0) {
3920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
3921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        digIndx += 1;       // The implicit leading zero
3922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (trailingZeroCount % 2 != 0) {
3924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // We had to consume one trailing zero for the low digit
3925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // of the least significant byte
3926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        digIndx += 1;       // The trailing zero not in the exponent
3927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        trailingZeroCount -= 1;
3928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;
3931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Subtract one off of the last byte. Really the first byte here, but it's reversed...
3933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[2] -= 1;
3934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /*
3936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    We want to skip over the first two slots in the buffer. The first slot
3937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
3938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
3939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    The exponent must be adjusted by the number of leading zeroes, and the number of
3940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    trailing zeroes.
3941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    */
3942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
3943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    uint32_t exponent = (digIndx+trailingZeroCount)/2;
3944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (leadingZeroIndex)
3945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
3946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F));
3947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Now transfer the collation key to our collIterate struct.
394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The total size for our collation key is half of endIndex, rounded up.
395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t size = (endIndex+1)/2;
395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(!ensureCEsCapacity(source, size)) {
395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return UCOL_NULLORDER;
395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
3954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
3955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
3956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_BYTE_COMMON; // Tertiary weight.
3957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    i = endIndex - 1; // Reset the index into the buffer.
3958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(i >= 2) {
3959b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                        uint32_t primWeight = numTempBuf[i--] << 8;
3960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if ( i >= 2)
3961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            primWeight |= numTempBuf[i--];
3962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
3963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
3964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->toReturn = source->CEpos -1;
3966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return *(source->toReturn);
3967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
3968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
3969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = *(CEOffset++);
3970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
3973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
3975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
3976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t
3977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
3978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t LCount = 19;
3979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t VCount = 21;
3980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                static const uint32_t TCount = 28;
3981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t NCount = VCount * TCount;   /* 588 */
3982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //const uint32_t SCount = LCount * NCount;   /* 11172 */
3983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t L = ch - SBase;
3985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
3986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                divide into pieces.
3987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                we do it in this order since some compilers can do % and / in one
3988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                operation
3989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
3990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t T = L % TCount;
3991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L /= TCount;
3992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint32_t V = L % VCount;
3993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L /= VCount;
3994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
3995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* offset them */
3996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                L += LBase;
3997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                V += VBase;
3998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                T += TBase;
3999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
400027f654740f2a26ad62a5c155af9199af9e69b889claireho                int32_t firstOffset = (int32_t)(source->pos - source->string);
400127f654740f2a26ad62a5c155af9199af9e69b889claireho                source->appendOffset(firstOffset, *status);
4002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
4004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 * return the first CE, but first put the rest into the expansion buffer
4005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 */
4006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (!source->coll->image->jamoSpecial) {
4007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
4008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
400927f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->appendOffset(firstOffset + 1, *status);
4010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
401127f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (T != TBase) {
4012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
401327f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->appendOffset(firstOffset + 1, *status);
401427f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
4015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->toReturn = source->CEpos - 1;
4017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
401827f654740f2a26ad62a5c155af9199af9e69b889claireho                    source->offsetReturn = source->offsetStore - 1;
401927f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (source->offsetReturn == source->offsetBuffer) {
402027f654740f2a26ad62a5c155af9199af9e69b889claireho                        source->offsetStore = source->offsetBuffer;
402127f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
402227f654740f2a26ad62a5c155af9199af9e69b889claireho
402327f654740f2a26ad62a5c155af9199af9e69b889claireho                    return *(source->toReturn);
4024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Since Hanguls pass the FCD check, it is
4026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // guaranteed that we won't be in
4027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // the normalization buffer if something like this happens
4028b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Move Jamos into normalization buffer
403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar *tempbuffer = source->writableBuffer.getBuffer(5);
4031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    int32_t tempbufferLength, jamoOffset;
403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    tempbuffer[0] = 0;
403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    tempbuffer[1] = (UChar)L;
403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    tempbuffer[2] = (UChar)V;
4035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (T != TBase) {
403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        tempbuffer[3] = (UChar)T;
403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        tempbufferLength = 4;
4038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        tempbufferLength = 3;
4040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    source->writableBuffer.releaseBuffer(tempbufferLength);
4042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4043b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // Indicate where to continue in main input string after exhausting the writableBuffer
4044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (source->pos  == source->string) {
4045b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        jamoOffset = 0;
4046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->fcdPosition = NULL;
4047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
4048b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        jamoOffset = source->pos - source->string;
4049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        source->fcdPosition       = source->pos-1;
4050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // Append offsets for the additional chars
4053b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // (not the 0, and not the L whose offsets match the original Hangul)
4054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    int32_t jamoRemaining = tempbufferLength - 2;
4055b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    jamoOffset++; // appended offsets should match end of original Hangul
4056b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    while (jamoRemaining-- > 0) {
4057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        source->appendOffset(jamoOffset, *status);
4058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
4059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    source->offsetRepeatValue = jamoOffset;
4061b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    source->offsetReturn = source->offsetStore - 1;
4063b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (source->offsetReturn == source->offsetBuffer) {
4064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        source->offsetStore = source->offsetBuffer;
4065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
4066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    source->pos               = source->writableBuffer.getTerminatedBuffer() + tempbufferLength;
4068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->origFlags         = source->flags;
4069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->flags            |= UCOL_ITER_INNORMBUF;
4070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->flags            &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
4071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return(UCOL_IGNORABLE);
4073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case IMPLICIT_TAG:        /* everything that is not defined otherwise */
4077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return getPrevImplicit(ch, source);
4078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // TODO: Remove CJK implicits as they are handled by the getImplicitPrimary function
4080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
4081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return getPrevImplicit(ch, source);
4082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case SURROGATE_TAG:  /* This is a surrogate pair */
408427f654740f2a26ad62a5c155af9199af9e69b889claireho            /* essentially an engaged lead surrogate. */
4085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* if you have encountered it here, it means that a */
4086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* broken sequence was encountered and this is an error */
408727f654740f2a26ad62a5c155af9199af9e69b889claireho            return UCOL_NOT_FOUND;
4088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case LEAD_SURROGATE_TAG:  /* D800-DBFF*/
409027f654740f2a26ad62a5c155af9199af9e69b889claireho            return UCOL_NOT_FOUND; /* broken surrogate sequence */
4091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
4093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            {
4094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar32 cp = 0;
4095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UChar  prevChar;
409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const UChar *prev;
4097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (isAtStartPrevIterate(source)) {
4098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* we are at the start of the string, wrong place to be at */
409927f654740f2a26ad62a5c155af9199af9e69b889claireho                    return UCOL_NOT_FOUND;
4100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
410150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (source->pos != source->writableBuffer.getBuffer()) {
4102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    prev     = source->pos - 1;
4103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    prev     = source->fcdPosition;
4105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                prevChar = *prev;
4107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* Handles Han and Supplementary characters here.*/
4109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (U16_IS_LEAD(prevChar)) {
4110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
4111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    source->pos = prev;
4112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
411327f654740f2a26ad62a5c155af9199af9e69b889claireho                    return UCOL_NOT_FOUND; /* like unassigned */
4114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return getPrevImplicit(cp, source);
4117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* UCA is filled with these. Tailorings are NOT_FOUND */
4120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* not yet implemented */
4121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case CHARSET_TAG:  /* this tag always returns */
4122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* probably after 1.8 */
4123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return UCOL_NOT_FOUND;
4124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        default:           /* this tag always returns */
4126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_INTERNAL_PROGRAM_ERROR;
4127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE=0;
4128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
4129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (CE <= UCOL_NOT_FOUND) {
4132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
4133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return CE;
4137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This should really be a macro                                                                      */
4140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This function is used to reverse parts of a buffer. We need this operation when doing continuation */
4141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* secondaries in French                                                                              */
4142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
4143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid uprv_ucol_reverse_buffer(uint8_t *start, uint8_t *end) {
4144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint8_t temp;
4145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  while(start<end) {
4146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    temp = *start;
4147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *start++ = *end;
4148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *end-- = temp;
4149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
4150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
4152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define uprv_ucol_reverse_buffer(TYPE, start, end) { \
4154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  TYPE tempA; \
4155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruwhile((start)<(end)) { \
4156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    tempA = *(start); \
4157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *(start)++ = *(end); \
4158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *(end)-- = tempA; \
4159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} \
4160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
4163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the sortkey generation functions                           */
4164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                          */
4165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
4166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
4168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Merge two sort keys.
4169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is useful, for example, to combine sort keys from first and last names
4170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to sort such pairs.
4171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Merged sort keys consider on each collation level the first part first entirely,
4172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then the second one.
4173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is possible to merge multiple sort keys by consecutively merging
4174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * another one with the intermediate result.
4175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
4176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The length of the merge result is the sum of the lengths of the input sort keys
4177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * minus 1.
4178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
4179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src1 the first sort key
4180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src1Length the length of the first sort key, including the zero byte at the end;
4181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        can be -1 if the function is to find the length
4182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src2 the second sort key
4183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param src2Length the length of the second sort key, including the zero byte at the end;
4184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        can be -1 if the function is to find the length
4185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest the buffer where the merged sort key is written,
4186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        can be NULL if destCapacity==0
4187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param destCapacity the number of bytes in the dest buffer
4188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the length of the merged sort key, src1Length+src2Length-1;
4189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
4190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *         in which cases the contents of dest is undefined
4191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
4192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @draft
4193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
4194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
4195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
4196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   const uint8_t *src2, int32_t src2Length,
4197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   uint8_t *dest, int32_t destCapacity) {
4198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t destLength;
4199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t b;
4200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* check arguments */
4202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( src1==NULL || src1Length<-2 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
4203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src2==NULL || src2Length<-2 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
4204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        destCapacity<0 || (destCapacity>0 && dest==NULL)
4205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
4206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* error, attempt to write a zero byte and return 0 */
4207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(dest!=NULL && destCapacity>0) {
4208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest=0;
4209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
4211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* check lengths and capacity */
4214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(src1Length<0) {
4215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
4216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(src2Length<0) {
4218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
4219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    destLength=src1Length+src2Length-1;
4222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(destLength>destCapacity) {
4223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* the merged sort key does not fit into the destination */
4224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return destLength;
4225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* merge the sort keys with the same number of levels */
4228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(*src1!=0 && *src2!=0) { /* while both have another level */
4229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* copy level from src1 not including 00 or 01 */
4230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((b=*src1)>=2) {
4231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++src1;
4232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=b;
4233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* add a 02 merge separator */
4236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *dest++=2;
4237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* copy level from src2 not including 00 or 01 */
4239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((b=*src2)>=2) {
4240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++src2;
4241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=b;
4242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* if both sort keys have another level, then add a 01 level separator and continue */
4245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(*src1==1 && *src2==1) {
4246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++src1;
4247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++src2;
4248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=1;
4249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
4253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * here, at least one sort key is finished now, but the other one
4254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * might have some contents left from containing more levels;
4255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * that contents is just appended to the result
4256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
4257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*src1!=0) {
4258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* src1 is not finished, therefore *src2==0, and src1 is appended */
4259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src2=src1;
4260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* append src2, "the other, unfinished sort key" */
4262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_strcpy((char *)dest, (const char *)src2);
4263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* trust that neither sort key contained illegally embedded zero bytes */
4265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return destLength;
4266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
4269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass SortKeyByteSink : public ByteSink {
4271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic:
4272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const uint32_t FILL_ORIGINAL_BUFFER = 1;
4273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const uint32_t DONT_GROW = 2;
4274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink(char *dest, int32_t destCapacity, uint32_t flags=0)
4275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            : ownedBuffer_(NULL), buffer_(dest), capacity_(destCapacity),
4276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              appended_(0),
4277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              fill_(flags & FILL_ORIGINAL_BUFFER),
4278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              grow_((flags & DONT_GROW) == 0) {
4279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (buffer_ == NULL || capacity_ < 0) {
4280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            buffer_ = reinterpret_cast<char *>(&lastResortByte_);
4281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            capacity_ = 0;
4282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual ~SortKeyByteSink() { uprv_free(ownedBuffer_); }
4285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual void Append(const char *bytes, int32_t n);
4287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void Append(const uint8_t *bytes, int32_t n) { Append(reinterpret_cast<const char *>(bytes), n); }
4288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void Append(uint8_t b) {
4289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (appended_ < capacity_) {
4290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            buffer_[appended_++] = (char)b;
4291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
4292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Append(&b, 1);
4293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void Append(uint8_t b1, uint8_t b2) {
4296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t a2 = appended_ + 2;
4297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (a2 <= capacity_) {
4298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            buffer_[appended_] = (char)b1;
4299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            buffer_[appended_ + 1] = (char)b2;
4300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            appended_ = a2;
4301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
4302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            char bytes[2] = { (char)b1, (char)b2 };
4303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Append(bytes, 2);
4304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void Append(const SortKeyByteSink &other) { Append(other.buffer_, other.appended_); }
4307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual char *GetAppendBuffer(int32_t min_capacity,
4308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                  int32_t desired_capacity_hint,
4309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                  char *scratch, int32_t scratch_capacity,
4310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                  int32_t *result_capacity);
4311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t NumberOfBytesAppended() const { return appended_; }
4312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uint8_t &LastByte() {
4313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (buffer_ != NULL && appended_ > 0) {
4314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return reinterpret_cast<uint8_t *>(buffer_)[appended_ - 1];
4315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
4316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return lastResortByte_;
4317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uint8_t *GetLastFewBytes(int32_t n) {
4320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (buffer_ != NULL && appended_ >= n) {
4321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return reinterpret_cast<uint8_t *>(buffer_) + appended_ - n;
4322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
4323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return NULL;
4324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char *GetBuffer() { return buffer_; }
4327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uint8_t *GetUnsignedBuffer() { return reinterpret_cast<uint8_t *>(buffer_); }
4328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uint8_t *OrphanUnsignedBuffer(int32_t &orphanedCapacity);
4329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UBool IsOk() const { return buffer_ != NULL; }  // otherwise out-of-memory
4330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate:
4332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
4333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
4334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UBool Resize(int32_t appendCapacity, int32_t length);
4336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void SetNotOk() {
4337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        buffer_ = NULL;
4338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        capacity_ = 0;
4339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static uint8_t lastResortByte_;  // last-resort return value from LastByte()
4342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char *ownedBuffer_;
4344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char *buffer_;
4345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t capacity_;
4346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t appended_;
4347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UBool fill_;
4348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UBool grow_;
4349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho};
4350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehouint8_t SortKeyByteSink::lastResortByte_ = 0;
4352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid
4354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoSortKeyByteSink::Append(const char *bytes, int32_t n) {
4355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (n <= 0) {
4356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
4357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length = appended_;
4359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    appended_ += n;
4360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if ((buffer_ + length) == bytes) {
4361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;  // the caller used GetAppendBuffer() and wrote the bytes already
4362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (buffer_ == NULL) {
4364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;  // allocation failed before already
4365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t available = capacity_ - length;
4367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (bytes == NULL) {
4368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // assume that the caller failed to allocate memory
4369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (fill_) {
4370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (n > available) {
4371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                n = available;
4372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
4373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            uprv_memset(buffer_, 0, n);
4374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        SetNotOk();  // propagate the out-of-memory error
4376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
4377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (n > available) {
4379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (fill_ && available > 0) {
4380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Fill the original buffer completely.
4381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            uprv_memcpy(buffer_ + length, bytes, available);
4382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            bytes += available;
4383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length += available;
4384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            n -= available;
4385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            available = 0;
4386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        fill_ = FALSE;
4388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (!Resize(n, length)) {
4389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            SetNotOk();
4390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return;
4391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uprv_memcpy(buffer_ + length, bytes, n);
4394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
4395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehochar *
4397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoSortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
4398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                 int32_t desired_capacity_hint,
4399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                 char *scratch,
4400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                 int32_t scratch_capacity,
4401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                 int32_t *result_capacity) {
4402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (min_capacity < 1 || scratch_capacity < min_capacity) {
4403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *result_capacity = 0;
4404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
4405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t available = capacity_ - appended_;
4407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (available >= min_capacity) {
4408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *result_capacity = available;
4409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return buffer_ + appended_;
4410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else if (Resize(desired_capacity_hint, appended_)) {
4411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *result_capacity = capacity_ - appended_;
4412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return buffer_ + appended_;
4413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
4414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *result_capacity = scratch_capacity;
4415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return scratch;
4416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4417b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
4418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4419b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool
4420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoSortKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
4421b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (!grow_) {
4422b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
4423b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4424b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t newCapacity = 2 * capacity_;
4425b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t altCapacity = length + 2 * appendCapacity;
4426b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (newCapacity < altCapacity) {
4427b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        newCapacity = altCapacity;
4428b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4429b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (newCapacity < 1024) {
4430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        newCapacity = 1024;
4431b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4432b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char *newBuffer = (char *)uprv_malloc(newCapacity);
4433b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (newBuffer == NULL) {
4434b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
4435b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4436b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uprv_memcpy(newBuffer, buffer_, length);
4437b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uprv_free(ownedBuffer_);
4438b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ownedBuffer_ = buffer_ = newBuffer;
4439b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    capacity_ = newCapacity;
4440b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return TRUE;
4441b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
4442b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4443b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehouint8_t *
4444b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoSortKeyByteSink::OrphanUnsignedBuffer(int32_t &orphanedCapacity) {
4445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (buffer_ == NULL || appended_ == 0) {
4446b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        orphanedCapacity = 0;
4447b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
4448b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4449b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (ownedBuffer_ != NULL) {
4450b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // orphan & forget the ownedBuffer_
4451b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uint8_t *returnBuffer = reinterpret_cast<uint8_t *>(ownedBuffer_);
4452b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ownedBuffer_ = buffer_ = NULL;
4453b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        orphanedCapacity = capacity_;
4454b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        capacity_ = appended_ = 0;
4455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return returnBuffer;
4456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4457b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // clone the buffer_
4458b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uint8_t *newBuffer = (uint8_t *)uprv_malloc(appended_);
4459b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (newBuffer == NULL) {
4460b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        orphanedCapacity = 0;
4461b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
4462b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4463b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uprv_memcpy(newBuffer, buffer_, appended_);
4464b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    orphanedCapacity = appended_;
4465b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return newBuffer;
4466b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
4467b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
4469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* sortkey API */
4471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
4472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getSortKey(const    UCollator    *coll,
4473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar        *source,
4474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t        sourceLength,
4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint8_t        *result,
4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t        resultLength)
4477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
4478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
4479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
4480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
4481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
4482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
4485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t keySize   = 0;
4486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(source != NULL) {
4488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // source == NULL is actually an error situation, but we would need to
4489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // have an error code to return it. Until we introduce a new
4490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // API, it stays like this
4491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* this uses the function pointer that is set in updateinternalstate */
4493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* currently, there are two funcs: */
4494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*ucol_calcSortKey(...);*/
4495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /*ucol_calcSortKeySimpleTertiary(...);*/
4496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4497b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        SortKeyByteSink sink(reinterpret_cast<char *>(result), resultLength,
4498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                             SortKeyByteSink::FILL_ORIGINAL_BUFFER | SortKeyByteSink::DONT_GROW);
4499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        coll->sortKeyGen(coll, source, sourceLength, sink, &status);
4500b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        keySize = sink.NumberOfBytesAppended();
4501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
4503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_EXIT_STATUS(status);
4504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return keySize;
4505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* this function is called by the C++ API for sortkey generation */
4508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
4509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getSortKeyWithAllocation(const UCollator *coll,
4510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const UChar *source, int32_t sourceLength,
4511b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                              uint8_t *&result, int32_t &resultCapacity,
4512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              UErrorCode *pErrorCode) {
4513b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink sink(reinterpret_cast<char *>(result), resultCapacity);
4514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    coll->sortKeyGen(coll, source, sourceLength, sink, pErrorCode);
4515b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t resultLen = sink.NumberOfBytesAppended();
4516b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_SUCCESS(*pErrorCode)) {
4517b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (!sink.IsOk()) {
4518b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
4519b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if (result != sink.GetUnsignedBuffer()) {
4520b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result = sink.OrphanUnsignedBuffer(resultCapacity);
4521b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4522b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
4523b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return resultLen;
4524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
452627f654740f2a26ad62a5c155af9199af9e69b889claireho// Is this primary weight compressible?
452727f654740f2a26ad62a5c155af9199af9e69b889claireho// Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).
452827f654740f2a26ad62a5c155af9199af9e69b889claireho// TODO: This should use per-lead-byte flags from FractionalUCA.txt.
452927f654740f2a26ad62a5c155af9199af9e69b889clairehostatic inline UBool
453027f654740f2a26ad62a5c155af9199af9e69b889clairehoisCompressible(const UCollator * /*coll*/, uint8_t primary1) {
453127f654740f2a26ad62a5c155af9199af9e69b889claireho    return UCOL_BYTE_FIRST_NON_LATIN_PRIMARY <= primary1 && primary1 <= maxRegularPrimary;
453227f654740f2a26ad62a5c155af9199af9e69b889claireho}
453327f654740f2a26ad62a5c155af9199af9e69b889claireho
4534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
4535b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoinline void doCaseShift(SortKeyByteSink &cases, uint32_t &caseShift) {
4536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (caseShift  == 0) {
4537b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        cases.Append(UCOL_CASE_BYTE_START);
4538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        caseShift = UCOL_CASE_SHIFT_START;
4539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4542b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Packs the secondary buffer when processing French locale.
4543b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void
4544b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopackFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) {
4545b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    secondaries += secsize;  // We read the secondary-level bytes back to front.
4546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t secondary;
4547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t count2 = 0;
4548b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t i = 0;
4549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // we use i here since the key size already accounts for terminators, so we'll discard the increment
4550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(i = 0; i<secsize; i++) {
4551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secondary = *(secondaries-i-1);
4552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* This is compression code. */
4553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (secondary == UCOL_COMMON2) {
4554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            ++count2;
4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
4556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (count2 > 0) {
4557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
4558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count2 > UCOL_TOP_COUNT2) {
4559b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        result.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
4560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count2 -= (uint32_t)UCOL_TOP_COUNT2;
4561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4562b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    result.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
4563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count2 > UCOL_BOT_COUNT2) {
4565b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
4566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count2 -= (uint32_t)UCOL_BOT_COUNT2;
4567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4568b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
4569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                count2 = 0;
4571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result.Append(secondary);
4573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (count2 > 0) {
4576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while (count2 > UCOL_BOT_COUNT2) {
4577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
4578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count2 -= (uint32_t)UCOL_BOT_COUNT2;
4579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4580b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
4581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0
4585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the sortkey work horse function */
4587b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CFUNC void U_CALLCONV
4588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_calcSortKey(const    UCollator    *coll,
4589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar        *source,
4590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t        sourceLength,
4591b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        SortKeyByteSink &result,
4592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode *status)
4593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
4594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status)) {
4595b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
4596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4598b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /* Stack allocated buffers for buffers we use */
4599b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER];
4600b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BUFFER];
4601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4602b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink &primaries = result;
4603b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink secondaries(second, LENGTHOF(second));
4604b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink tertiaries(tert, LENGTHOF(tert));
4605b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink cases(caseB, LENGTHOF(caseB));
4606b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink quads(quad, LENGTHOF(quad));
4607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
460850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString normSource;
4609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength);
4611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UColAttributeValue strength = coll->strength;
4613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareSec   = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
4615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareTer   = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
4616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t compareQuad  = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
4617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  compareIdent = (strength == UCOL_IDENTICAL);
4618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  doCase = (coll->caseLevel == UCOL_ON);
4619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0);
4620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  shifted = (coll->alternateHandling == UCOL_SHIFTED);
4621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UBool  qShifted = shifted && (compareQuad == 0);
4622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool  doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
4623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t variableTopValue = coll->variableTopValue;
4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no
4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // qShifted, we don't need to set UCOL_COMMON_BOT4 so high.
4627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
4628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_HIRAGANA_QUAD = 0;
4629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(doHiragana) {
4630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++;
4631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* allocate one more space for hiragana, value for hiragana */
4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);
4634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* support for special features like caselevel and funky secondaries */
4636b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t lastSecondaryLength = 0;
4637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t caseShift = 0;
4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* If we need to normalize, we'll do it all at once at the beginning! */
464050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const Normalizer2 *norm2;
4641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(compareIdent) {
464250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm2 = Normalizer2Factory::getNFDInstance(*status);
4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(coll->normalizationMode != UCOL_OFF) {
464450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm2 = Normalizer2Factory::getFCDInstance(*status);
4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
464650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm2 = NULL;
464750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
464850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(norm2 != NULL) {
464950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        normSource.setTo(FALSE, source, len);
465050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);
465150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(qcYesLength != len) {
465250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeString unnormalized = normSource.tempSubString(qcYesLength);
465350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            normSource.truncate(qcYesLength);
465450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);
465550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            source = normSource.getBuffer();
465650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len = normSource.length();
4657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collIterate s;
466050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, source, len, &s, status);
466150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
4662b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
466350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
466427f654740f2a26ad62a5c155af9199af9e69b889claireho    s.flags &= ~UCOL_ITER_NORM;  // source passed the FCD test or else was normalized.
4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t order = 0;
4667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary1 = 0;
4669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary2 = 0;
4670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t secondary = 0;
4671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiary = 0;
4672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseSwitch = coll->caseSwitch;
4673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryMask = coll->tertiaryMask;
4674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int8_t tertiaryAddition = coll->tertiaryAddition;
4675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryTop = coll->tertiaryTop;
4676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryBottom = coll->tertiaryBottom;
4677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryCommon = coll->tertiaryCommon;
4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseBits = 0;
4679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool wasShifted = FALSE;
4681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool notIsContinuation = FALSE;
4682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t count2 = 0, count3 = 0, count4 = 0;
4684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t leadPrimary = 0;
4685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(;;) {
4687b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        order = ucol_IGetNextCE(coll, &s, status);
4688b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(order == UCOL_NO_MORE_CES) {
4689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
4690b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4692b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(order == 0) {
4693b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            continue;
4694b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4696b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        notIsContinuation = !isContinuation(order);
4697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4698b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(notIsContinuation) {
4699b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK);
4700b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
4701b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
4702b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4704b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
4705b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
4706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        primary1 = (uint8_t)(order >> 8);
4707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4708b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uint8_t originalPrimary1 = primary1;
4709b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(notIsContinuation && coll->leadBytePermutationTable != NULL) {
4710b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            primary1 = coll->leadBytePermutationTable[primary1];
4711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
4712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4713b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
4714b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        || (!notIsContinuation && wasShifted)))
4715b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            || (wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */
4716b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        {
4717b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /* and other ignorables should be removed if following a shifted code point */
4718b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */
4719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /* we should just completely ignore it */
4720b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                continue;
472127f654740f2a26ad62a5c155af9199af9e69b889claireho            }
4722b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(compareQuad == 0) {
4723b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(count4 > 0) {
4724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    while (count4 > UCOL_BOT_COUNT4) {
4725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
4726b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        count4 -= UCOL_BOT_COUNT4;
4727b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
4728b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
4729b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    count4 = 0;
4730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4731b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /* We are dealing with a variable and we're treating them as shifted */
4732b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /* This is a shifted ignorable */
4733b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(primary1 != 0) { /* we need to check this since we could be in continuation */
4734b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    quads.Append(primary1);
4735b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
4736b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(primary2 != 0) {
4737b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    quads.Append(primary2);
4738b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
4739b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
4740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            wasShifted = TRUE;
4741b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
4742b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            wasShifted = FALSE;
4743b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
4744b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will   */
4745b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /* regular and simple sortkey calc */
4746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(primary1 != UCOL_IGNORABLE) {
4747b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(notIsContinuation) {
4748b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(leadPrimary == primary1) {
4749b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        primaries.Append(primary2);
4750b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
4751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if(leadPrimary != 0) {
4752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN));
4753b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
4754b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if(primary2 == UCOL_IGNORABLE) {
4755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            /* one byter, not compressed */
4756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            primaries.Append(primary1);
4757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            leadPrimary = 0;
4758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        } else if(isCompressible(coll, originalPrimary1)) {
4759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            /* compress */
4760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            primaries.Append(leadPrimary = primary1, primary2);
4761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        } else {
4762b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            leadPrimary = 0;
4763b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            primaries.Append(primary1, primary2);
4764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
4767b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(primary2 == UCOL_IGNORABLE) {
4768b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        primaries.Append(primary1);
4769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
4770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        primaries.Append(primary1, primary2);
4771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4773b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
4774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
4775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(secondary > compareSec) {
4776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(!isFrenchSec) {
4777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    /* This is compression code. */
4778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (secondary == UCOL_COMMON2 && notIsContinuation) {
4779b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        ++count2;
4780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
4781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if (count2 > 0) {
4782b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
4783b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                while (count2 > UCOL_TOP_COUNT2) {
4784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                    secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
4785b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                    count2 -= (uint32_t)UCOL_TOP_COUNT2;
4786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
4787b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
478827f654740f2a26ad62a5c155af9199af9e69b889claireho                            } else {
4789b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                while (count2 > UCOL_BOT_COUNT2) {
4790b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                    secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
4791b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                    count2 -= (uint32_t)UCOL_BOT_COUNT2;
479227f654740f2a26ad62a5c155af9199af9e69b889claireho                                }
4793b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
4794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4795b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            count2 = 0;
4796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4797b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        secondaries.Append(secondary);
4798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4799b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
4800b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    /* Do the special handling for French secondaries */
4801b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    /* We need to get continuation elements and do intermediate restore */
4802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    /* abc1c2c3de with french secondaries need to be edc1c2c3ba NOT edc3c2c1ba */
4803b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(notIsContinuation) {
4804b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if (lastSecondaryLength > 1) {
4805b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength);
4806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (frenchStartPtr != NULL) {
4807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                /* reverse secondaries from frenchStartPtr up to frenchEndPtr */
4808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1;
4809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
4810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4812b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        lastSecondaryLength = 1;
4813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
4814b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        ++lastSecondaryLength;
4815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4816b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    secondaries.Append(secondary);
4817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
4819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) {
4821b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // do the case level if we need to do it. We don't want to calculate
4822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // case level for primary ignorables if we have only primary strength and case level
4823b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // otherwise we would break well formedness of CEs
4824b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                doCaseShift(cases, caseShift);
4825b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(notIsContinuation) {
4826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    caseBits = (uint8_t)(tertiary & 0xC0);
4827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(tertiary != 0) {
4829b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if(coll->caseFirst == UCOL_UPPER_FIRST) {
4830b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            if((caseBits & 0xC0) == 0) {
4831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                cases.LastByte() |= 1 << (--caseShift);
4832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
4833b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                cases.LastByte() |= 0 << (--caseShift);
4834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                /* second bit */
4835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                doCaseShift(cases, caseShift);
4836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                cases.LastByte() |= ((caseBits>>6)&1) << (--caseShift);
4837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        } else {
4839b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            if((caseBits & 0xC0) == 0) {
4840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                cases.LastByte() |= 0 << (--caseShift);
4841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
4842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                cases.LastByte() |= 1 << (--caseShift);
4843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                /* second bit */
4844b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                doCaseShift(cases, caseShift);
4845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                cases.LastByte() |= ((caseBits>>7)&1) << (--caseShift);
4846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
4851b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(notIsContinuation) {
4852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    tertiary ^= caseSwitch;
4853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
4854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
4855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            tertiary &= tertiaryMask;
4857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(tertiary > compareTer) {
4858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /* This is compression code. */
4859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                /* sequence size check is included in the if clause */
4860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (tertiary == tertiaryCommon && notIsContinuation) {
4861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    ++count3;
4862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
4863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
4864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        tertiary += tertiaryAddition;
4865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else if(tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
4866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        tertiary -= tertiaryAddition;
4867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
4868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (count3 > 0) {
4869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        if ((tertiary > tertiaryCommon)) {
4870b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            while (count3 > coll->tertiaryTopCount) {
4871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
4872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                count3 -= (uint32_t)coll->tertiaryTopCount;
4873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            }
4874b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            tertiaries.Append((uint8_t)(tertiaryTop - (count3-1)));
4875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        } else {
4876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            while (count3 > coll->tertiaryBottomCount) {
4877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
4878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                count3 -= (uint32_t)coll->tertiaryBottomCount;
4879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
4880b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
4881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4882b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        count3 = 0;
4883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4884b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    tertiaries.Append(tertiary);
4885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(/*qShifted*/(compareQuad==0)  && notIsContinuation) {
4889b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
4890b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(count4>0) { // Close this part
4891b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        while (count4 > UCOL_BOT_COUNT4) {
4892b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
4893b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            count4 -= UCOL_BOT_COUNT4;
4894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
4895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
4896b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        count4 = 0;
4897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4898b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    quads.Append(UCOL_HIRAGANA_QUAD); // Add the Hiragana
4899b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else { // This wasn't Hiragana, so we can continue adding stuff
4900b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    count4++;
4901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Here, we are generally done with processing */
4907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* bailing out would not be too productive */
4908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(*status)) {
4910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* we have done all the CE's, now let's put them together to form a key */
4911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(compareSec == 0) {
4912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (count2 > 0) {
4913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (count2 > UCOL_BOT_COUNT2) {
4914b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
4915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    count2 -= (uint32_t)UCOL_BOT_COUNT2;
4916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
4918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4919b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result.Append(UCOL_LEVELTERMINATOR);
4920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(!isFrenchSec || !secondaries.IsOk()) {
4921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result.Append(secondaries);
4922b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
4923b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // If there are any unresolved continuation secondaries,
4924b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // reverse them here so that we can reverse the whole secondary thing.
4925b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (lastSecondaryLength > 1) {
4926b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength);
4927b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (frenchStartPtr != NULL) {
4928b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        /* reverse secondaries from frenchStartPtr up to frenchEndPtr */
4929b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1;
4930b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
4931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4933b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                packFrench(secondaries.GetUnsignedBuffer(), secondaries.NumberOfBytesAppended(), result);
4934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(doCase) {
4938b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result.Append(UCOL_LEVELTERMINATOR);
4939b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result.Append(cases);
4940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(compareTer == 0) {
4943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (count3 > 0) {
4944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (coll->tertiaryCommon != UCOL_COMMON_BOT3) {
4945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count3 >= coll->tertiaryTopCount) {
4946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
4947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count3 -= (uint32_t)coll->tertiaryTopCount;
4948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    tertiaries.Append((uint8_t)(tertiaryTop - count3));
4950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count3 > coll->tertiaryBottomCount) {
4952b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
4953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count3 -= (uint32_t)coll->tertiaryBottomCount;
4954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4955b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
4956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4958b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result.Append(UCOL_LEVELTERMINATOR);
4959b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result.Append(tertiaries);
4960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(compareQuad == 0/*qShifted == TRUE*/) {
4962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(count4 > 0) {
4963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while (count4 > UCOL_BOT_COUNT4) {
4964b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
4965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        count4 -= UCOL_BOT_COUNT4;
4966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4967b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
4968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4969b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result.Append(UCOL_LEVELTERMINATOR);
4970b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result.Append(quads);
4971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(compareIdent) {
4974b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result.Append(UCOL_LEVELTERMINATOR);
4975b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                u_writeIdenticalLevelRun(s.string, len, result);
4976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4978b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.Append(0);
4979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* To avoid memory leak, free the offset buffer if necessary. */
4982b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucol_freeOffsetBuffer(&s);
4983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CFUNC void U_CALLCONV
4987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
4988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar        *source,
4989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t        sourceLength,
4990b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        SortKeyByteSink &result,
4991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode *status)
4992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
4993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ALIGN_CODE(16);
4994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status)) {
4996b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
4997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4999b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /* Stack allocated buffers for buffers we use */
5000b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER];
5001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5002b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink &primaries = result;
5003b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink secondaries(second, LENGTHOF(second));
5004b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    SortKeyByteSink tertiaries(tert, LENGTHOF(tert));
5005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
500650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString normSource;
5007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t len =  sourceLength;
5009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* If we need to normalize, we'll do it all at once at the beginning! */
501150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(coll->normalizationMode != UCOL_OFF) {
501250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        normSource.setTo(len < 0, source, len);
501350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const Normalizer2 *norm2 = Normalizer2Factory::getFCDInstance(*status);
501450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);
501550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(qcYesLength != normSource.length()) {
501650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeString unnormalized = normSource.tempSubString(qcYesLength);
501750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            normSource.truncate(qcYesLength);
501850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);
501950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            source = normSource.getBuffer();
502050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len = normSource.length();
5021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collIterate s;
502450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, (UChar *)source, len, &s, status);
502550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
5026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
502750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
502827f654740f2a26ad62a5c155af9199af9e69b889claireho    s.flags &= ~UCOL_ITER_NORM;  // source passed the FCD test or else was normalized.
5029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t order = 0;
5031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary1 = 0;
5033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t primary2 = 0;
5034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t secondary = 0;
5035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiary = 0;
5036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseSwitch = coll->caseSwitch;
5037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryMask = coll->tertiaryMask;
5038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int8_t tertiaryAddition = coll->tertiaryAddition;
5039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryTop = coll->tertiaryTop;
5040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryBottom = coll->tertiaryBottom;
5041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryCommon = coll->tertiaryCommon;
5042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool notIsContinuation = FALSE;
5044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t count2 = 0, count3 = 0;
5046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t leadPrimary = 0;
5047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(;;) {
5049b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        order = ucol_IGetNextCE(coll, &s, status);
5050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(order == 0) {
5052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            continue;
5053b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
5054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5055b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(order == UCOL_NO_MORE_CES) {
5056b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
5057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
5058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        notIsContinuation = !isContinuation(order);
5060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5061b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(notIsContinuation) {
5062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            tertiary = (uint8_t)((order & tertiaryMask));
5063b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
5064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
5065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
506627f654740f2a26ad62a5c155af9199af9e69b889claireho
5067b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
5068b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
5069b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        primary1 = (uint8_t)(order >> 8);
5070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5071b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uint8_t originalPrimary1 = primary1;
5072b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
5073b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            primary1 = coll->leadBytePermutationTable[primary1];
5074b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
507527f654740f2a26ad62a5c155af9199af9e69b889claireho
5076b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
5077b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will   */
5078b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above.               */
5079b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /* regular and simple sortkey calc */
5080b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(primary1 != UCOL_IGNORABLE) {
5081b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(notIsContinuation) {
5082b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(leadPrimary == primary1) {
5083b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    primaries.Append(primary2);
5084b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
5085b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(leadPrimary != 0) {
5086b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN));
5087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5088b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(primary2 == UCOL_IGNORABLE) {
5089b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        /* one byter, not compressed */
5090b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        primaries.Append(primary1);
5091b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        leadPrimary = 0;
5092b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else if(isCompressible(coll, originalPrimary1)) {
5093b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        /* compress */
5094b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        primaries.Append(leadPrimary = primary1, primary2);
5095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
5096b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        leadPrimary = 0;
5097b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        primaries.Append(primary1, primary2);
5098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
5101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(primary2 == UCOL_IGNORABLE) {
5102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    primaries.Append(primary1);
5103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    primaries.Append(primary1, primary2);
5105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
5108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(secondary > 0) { /* I think that != 0 test should be != IGNORABLE */
5110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /* This is compression code. */
5111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (secondary == UCOL_COMMON2 && notIsContinuation) {
5112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++count2;
5113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
5114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (count2 > 0) {
5115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
5116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        while (count2 > UCOL_TOP_COUNT2) {
5117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
5118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            count2 -= (uint32_t)UCOL_TOP_COUNT2;
5119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
5121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
5122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        while (count2 > UCOL_BOT_COUNT2) {
5123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
5124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            count2 -= (uint32_t)UCOL_BOT_COUNT2;
5125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
5126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
5127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    count2 = 0;
5129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                secondaries.Append(secondary);
5131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
5133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(notIsContinuation) {
5135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            tertiary ^= caseSwitch;
5136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
5137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(tertiary > 0) {
5139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /* This is compression code. */
5140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            /* sequence size check is included in the if clause */
5141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (tertiary == tertiaryCommon && notIsContinuation) {
5142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++count3;
5143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
5144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
5145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    tertiary += tertiaryAddition;
5146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else if (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
5147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    tertiary -= tertiaryAddition;
5148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
5149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (count3 > 0) {
5150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if ((tertiary > tertiaryCommon)) {
5151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        while (count3 > coll->tertiaryTopCount) {
5152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
5153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            count3 -= (uint32_t)coll->tertiaryTopCount;
5154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
5155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        tertiaries.Append((uint8_t)(tertiaryTop - (count3-1)));
5156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        while (count3 > coll->tertiaryBottomCount) {
5158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
5159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            count3 -= (uint32_t)coll->tertiaryBottomCount;
5160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        }
5161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
5162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    count3 = 0;
5164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                tertiaries.Append(tertiary);
5166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(*status)) {
5171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* we have done all the CE's, now let's put them together to form a key */
5172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (count2 > 0) {
5173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while (count2 > UCOL_BOT_COUNT2) {
5174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
5175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                count2 -= (uint32_t)UCOL_BOT_COUNT2;
5176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
5178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.Append(UCOL_LEVELTERMINATOR);
5180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.Append(secondaries);
5181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (count3 > 0) {
5183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) {
5184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (count3 >= coll->tertiaryTopCount) {
5185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
5186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    count3 -= (uint32_t)coll->tertiaryTopCount;
5187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                tertiaries.Append((uint8_t)(tertiaryTop - count3));
5189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (count3 > coll->tertiaryBottomCount) {
5191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
5192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    count3 -= (uint32_t)coll->tertiaryBottomCount;
5193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
5195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.Append(UCOL_LEVELTERMINATOR);
5198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.Append(tertiaries);
5199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.Append(0);
5201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* To avoid memory leak, free the offset buffer if necessary. */
5204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucol_freeOffsetBuffer(&s);
5205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_SUCCESS(*status) && !result.IsOk()) {
5207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *status = U_BUFFER_OVERFLOW_ERROR;
5208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
5209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
5210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline
5212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) {
5213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool notIsContinuation = !isContinuation(CE);
5214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF);
521527f654740f2a26ad62a5c155af9199af9e69b889claireho    if((LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0)
521627f654740f2a26ad62a5c155af9199af9e69b889claireho               || (!notIsContinuation && *wasShifted)))
5217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || (*wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */
5218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
5219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // The stuff below should probably be in the sortkey code... maybe not...
5220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(primary1 != 0) { /* if we were shifted and we got an ignorable code point */
5221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            /* we should just completely ignore it */
5222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *wasShifted = TRUE;
5223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            //continue;
5224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //*wasShifted = TRUE;
5226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return TRUE;
5227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
5228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *wasShifted = FALSE;
5229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return FALSE;
5230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
5232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline
5233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *dest) {
5234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(level < maxLevel) {
5235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        dest[i++] = UCOL_LEVELTERMINATOR;
5236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
5237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        dest[i++] = 0;
5238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
5239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
5240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** enumeration of level identifiers for partial sort key generation */
5242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
5243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UCOL_PSK_PRIMARY = 0,
5244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_SECONDARY = 1,
5245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_CASE = 2,
5246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_TERTIARY = 3,
5247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_QUATERNARY = 4,
5248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_QUIN = 5,      /** This is an extra level, not used - but we have three bits to blow */
5249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_IDENTICAL = 6,
5250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_NULL = 7,      /** level for the end of sort key. Will just produce zeros */
5251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_LIMIT
5252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
5253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** collation state enum. *_SHIFT value is how much to shift right
5255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  to get the state piece to the right. *_MASK value should be
5256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  ANDed with the shifted state. This data is stored in state[1]
5257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  field.
5258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
5259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
5260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_LEVEL_SHIFT = 0,      /** level identificator. stores an enum value from above */
5261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_LEVEL_MASK = 7,       /** three bits */
5262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */
5263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,
5264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** can be only 0 or 1, since we get up to two bytes from primary or quaternary
5265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  This field is also used to denote that the French secondary level is finished
5266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
5267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_WAS_SHIFTED_SHIFT = 4,/** was the last value shifted */
5268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_WAS_SHIFTED_MASK = 1, /** can be 0 or 1 (Boolean) */
5269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already written */
5270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */
5271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** When we do French we need to reverse secondary values. However, continuations
5272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba
5273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
5274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_BOCSU_BYTES_SHIFT = 7,
5275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_BOCSU_BYTES_MASK = 3,
5276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_CONSUMED_CES_SHIFT = 9,
5277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF
5278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
5279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// macro calculating the number of expansion CEs available
5281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn
5282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** main sortkey part procedure. On the first call,
5285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  you should pass in a collator, an iterator, empty state
5286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  state[0] == state[1] == 0, a buffer to hold results
5287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  number of bytes you need and an error code pointer.
5288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  Make sure your buffer is big enough to hold the wanted
5289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  number of sortkey bytes. I don't check.
5290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  The only meaningful status you can get back is
5291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  U_BUFFER_OVERFLOW_ERROR, which basically means that you
5292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  have been dealt a raw deal and that you probably won't
5293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  be able to use partial sortkey generation for this
5294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  particular combination of string and collator. This
5295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  is highly unlikely, but you should still check the error code.
5296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  Any other status means that you're not in a sane situation
5297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  anymore. After the first call, preserve state values and
5298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  use them on subsequent calls to obtain more bytes of a sortkey.
5299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  Use until the number of bytes written is smaller than the requested
5300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  number of bytes. Generated sortkey is not compatible with the
5301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  one generated by ucol_getSortKey, as we don't do any compression.
5302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  However, levels are still terminated by a 1 (one) and the sortkey
5303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  is terminated by a 0 (zero). Identical level is the same as in the
5304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  regular sortkey - internal bocu-1 implementation is used.
5305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  For curious, although you cannot do much about this, here is
5306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  the structure of state words.
5307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  state[0] - iterator state. Depends on the iterator implementation,
5308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             but allows the iterator to continue where it stopped in
5309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             the last iteration.
5310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  state[1] - collation processing state. Here is the distribution
5311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             of the bits:
5312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   0, 1, 2 - level of the sortkey - primary, secondary, case, tertiary
5313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             quaternary, quin (we don't use this one), identical and
5314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             null (producing only zeroes - first one to terminate the
5315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             sortkey and subsequent to fill the buffer).
5316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   3       - byte count. Number of bytes written on the primary level.
5317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   4       - was shifted. Whether the previous iteration finished in the
5318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             shifted state.
5319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   5, 6    - French continuation bytes written. See the comment in the enum
5320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   7,8     - Bocsu bytes used. Number of bytes from a bocu sequence on
5321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             the identical level.
5322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *   9..31   - CEs consumed. Number of getCE or next32 operations performed
5323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *             since thes last successful update of the iterator state.
5324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
5325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
5326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_nextSortKeyPart(const UCollator *coll,
5327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     UCharIterator *iter,
5328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     uint32_t state[2],
5329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     uint8_t *dest, int32_t count,
5330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                     UErrorCode *status)
5331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
5332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* error checking */
5333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(status==NULL || U_FAILURE(*status)) {
5334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
5335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
5337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( coll==NULL || iter==NULL ||
5338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        state==NULL ||
5339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        count<0 || (count>0 && dest==NULL)
5340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
5341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status=U_ILLEGAL_ARGUMENT_ERROR;
5342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UTRACE_EXIT_STATUS(status);
5343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
5344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
5347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  coll, iter, state[0], state[1], dest, count);
5348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count==0) {
5350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* nothing to do */
5351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UTRACE_EXIT_VALUE(0);
5352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
5353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** Setting up situation according to the state we got from the previous iteration */
5355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The state of the iterator from the previous invocation
5356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t iterState = state[0];
5357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Has the last iteration ended in the shifted state
5358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_SHIFTED_MASK)?TRUE:FALSE;
5359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // What is the current level of the sortkey?
5360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK;
5361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Have we written only one byte from a two byte primary in the previous iteration?
5362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Also on secondary level - have we finished with the French secondary?
5363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK;
5364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // number of bytes in the continuation buffer for French
5365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK;
5366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Number of bytes already written from a bocsu sequence. Since
5367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // the longes bocsu sequence is 4 long, this can be up to 3.
5368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK_BOCSU_BYTES_MASK;
5369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Number of elements that need to be consumed in this iteration because
5370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // the iterator returned UITER_NO_STATE at the end of the last iteration,
5371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // so we had to save the last valid state.
5372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED_CES_MASK;
5373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** values that depend on the collator attributes */
5375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // strength of the collator.
5376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status);
5377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // maximal level of the partial sortkey. Need to take whether case level is done
5378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t maxLevel = 0;
5379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(strength < UCOL_TERTIARY) {
5380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
5381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = UCOL_PSK_CASE;
5382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
5383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = strength;
5384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
5386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strength == UCOL_TERTIARY) {
5387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = UCOL_PSK_TERTIARY;
5388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(strength == UCOL_QUATERNARY) {
5389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = UCOL_PSK_QUATERNARY;
5390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else { // identical
5391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            maxLevel = UCOL_IDENTICAL;
5392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation
5395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t UCOL_HIRAGANA_QUAD =
5396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF;
5397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Boundary value that decides whether a CE is shifted or not
5398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0;
5399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Are we doing French collation?
5400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON);
5401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /** initializing the collation state */
5403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool notIsContinuation = FALSE;
5404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t CE = UCOL_NO_MORE_CES;
5405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    collIterate s;
540750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, NULL, -1, &s, status);
540850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
540950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTRACE_EXIT_STATUS(*status);
541050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
541150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
5412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.iterator = iter;
5413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.flags |= UCOL_USE_ITERATOR;
5414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This variable tells us whether we have produced some other levels in this iteration
5415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // before we moved to the identical level. In that case, we need to switch the
5416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // type of the iterator.
5417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool doingIdenticalFromStart = FALSE;
5418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Normalizing iterator
5419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The division for the array length may truncate the array size to
5420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
5421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // for all platforms anyway.
5422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
5423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UNormIterator *normIter = NULL;
5424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If the normalization is turned on for the collator and we are below identical level
5425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // we will use a FCD normalizing iterator
5426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && level < UCOL_PSK_IDENTICAL) {
5427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
5428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status);
5429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.flags &= ~UCOL_ITER_NORM;
5430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(U_FAILURE(*status)) {
5431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UTRACE_EXIT_STATUS(*status);
5432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return 0;
5433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(level == UCOL_PSK_IDENTICAL) {
5435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // for identical level, we need a NFD iterator. We need to instantiate it here, since we
5436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // will be updating the state - and this cannot be done on an ordinary iterator.
5437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
5438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
5439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.flags &= ~UCOL_ITER_NORM;
5440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(U_FAILURE(*status)) {
5441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UTRACE_EXIT_STATUS(*status);
5442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return 0;
5443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        doingIdenticalFromStart = TRUE;
5445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This is the tentative new state of the iterator. The problem
5448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // is that the iterator might return an undefined state, in
5449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // which case we should save the last valid state and increase
5450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // the iterator skip value.
5451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t newState = 0;
5452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First, we set the iterator to the last valid position
5454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // from the last iteration. This was saved in state[0].
5455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(iterState == 0) {
5456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* initial state */
5457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone) {
5458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            s.iterator->move(s.iterator, 0, UITER_LIMIT);
5459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
5460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            s.iterator->move(s.iterator, 0, UITER_START);
5461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
5463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* reset to previous state */
5464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s.iterator->setState(s.iterator, iterState, status);
5465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(U_FAILURE(*status)) {
5466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UTRACE_EXIT_STATUS(*status);
5467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return 0;
5468c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This variable tells us whether we can attempt to update the state
5474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // of iterator. Situations where we don't want to update iterator state
5475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // are the existence of expansion CEs that are not yet processed, and
5476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // finishing the case level without enough space in the buffer to insert
5477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // a level terminator.
5478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool canUpdateState = TRUE;
5479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Consume all the CEs that were consumed at the end of the previous
5481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // iteration without updating the iterator state. On identical level,
5482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // consume the code points.
5483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t counter = cces;
5484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(level < UCOL_PSK_IDENTICAL) {
5485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(counter-->0) {
5486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // If we're doing French and we are on the secondary level,
5487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // we go backwards.
5488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(level == UCOL_PSK_SECONDARY && doingFrench) {
5489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetPrevCE(coll, &s, status);
5490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &s, status);
5492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(CE==UCOL_NO_MORE_CES) {
5494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /* should not happen */
5495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                *status=U_INTERNAL_PROGRAM_ERROR;
5496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UTRACE_EXIT_STATUS(*status);
5497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return 0;
5498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(uprv_numAvailableExpCEs(s)) {
5500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                canUpdateState = FALSE;
5501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
5504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(counter-->0) {
5505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uiter_next32(s.iterator);
5506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // French secondary needs to know whether the iterator state of zero came from previous level OR
5510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // from a new invocation...
5511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool wasDoingPrimary = FALSE;
5512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // destination buffer byte counter. When this guy
5513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // gets to count, we're done with the iteration
5514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0;
5515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // used to count the zero bytes written after we
5516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // have finished with the sort key
5517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t j = 0;
5518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Hm.... I think we're ready to plunge in. Basic story is as following:
5521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // we have a fall through case based on level. This is used for initial
5522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // positioning on iteration start. Every level processor contains a
5523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // for(;;) which will be broken when we exhaust all the CEs. Other
5524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // way to exit is a goto saveState, which happens when we have filled
5525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // out our buffer.
5526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(level) {
5527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_PRIMARY:
5528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        wasDoingPrimary = TRUE;
5529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
5530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(i==count) {
5531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto saveState;
5532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We should save the state only if we
5534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // are sure that we are done with the
5535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // previous iterator state
5536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(canUpdateState && byteCountOrFrenchDone == 0) {
5537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                newState = s.iterator->getState(s.iterator);
5538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(newState != UITER_NO_STATE) {
5539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    iterState = newState;
5540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
5541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            CE = ucol_IGetNextCE(coll, &s, status);
5544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            cces++;
5545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(CE==UCOL_NO_MORE_CES) {
5546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Add the level separator
5547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                terminatePSKLevel(level, maxLevel, i, dest);
5548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                byteCountOrFrenchDone=0;
5549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Restart the iteration an move to the
5550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // second level
5551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                s.iterator->move(s.iterator, 0, UITER_START);
5552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                cces = 0;
5553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                level = UCOL_PSK_SECONDARY;
5554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
5555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
555627f654740f2a26ad62a5c155af9199af9e69b889claireho            if(!isContinuation(CE)){
555727f654740f2a26ad62a5c155af9199af9e69b889claireho                if(coll->leadBytePermutationTable != NULL){
555827f654740f2a26ad62a5c155af9199af9e69b889claireho                    CE = (coll->leadBytePermutationTable[CE>>24] << 24) | (CE & 0x00FFFFFF);
555927f654740f2a26ad62a5c155af9199af9e69b889claireho                }
556027f654740f2a26ad62a5c155af9199af9e69b889claireho            }
5561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!isShiftedCE(CE, LVT, &wasShifted)) {
5562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */
5563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE != 0) {
5564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(byteCountOrFrenchDone == 0) {
5565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // get the second byte of primary
5566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        dest[i++]=(uint8_t)(CE >> 8);
5567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        byteCountOrFrenchDone = 0;
5569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((CE &=0xff)!=0) {
5571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(i==count) {
5572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* overflow */
5573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            byteCountOrFrenchDone = 1;
5574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            cces--;
5575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto saveState;
5576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        dest[i++]=(uint8_t)CE;
5578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(uprv_numAvailableExpCEs(s)) {
5582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                canUpdateState = FALSE;
5583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
5584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                canUpdateState = TRUE;
5585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
5587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* fall through to next level */
5588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    case UCOL_PSK_SECONDARY:
5589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(strength >= UCOL_SECONDARY) {
5590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(!doingFrench) {
5591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for(;;) {
5592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(i == count) {
5593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto saveState;
5594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // We should save the state only if we
5596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // are sure that we are done with the
5597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // previous iterator state
5598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(canUpdateState) {
5599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        newState = s.iterator->getState(s.iterator);
5600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(newState != UITER_NO_STATE) {
5601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            iterState = newState;
5602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            cces = 0;
5603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = ucol_IGetNextCE(coll, &s, status);
5606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces++;
5607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE==UCOL_NO_MORE_CES) {
5608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Add the level separator
5609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        terminatePSKLevel(level, maxLevel, i, dest);
5610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        byteCountOrFrenchDone = 0;
5611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Restart the iteration an move to the
5612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // second level
5613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        s.iterator->move(s.iterator, 0, UITER_START);
5614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        cces = 0;
5615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        level = UCOL_PSK_CASE;
5616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
5617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isShiftedCE(CE, LVT, &wasShifted)) {
5619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE >>= 8; /* get secondary */
5620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(CE != 0) {
5621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++]=(uint8_t)CE;
5622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(uprv_numAvailableExpCEs(s)) {
5625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = FALSE;
5626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = TRUE;
5628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else { // French secondary processing
5631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint8_t frenchBuff[UCOL_MAX_BUFFER];
5632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                int32_t frenchIndex = 0;
5633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Here we are going backwards.
5634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // If the iterator is at the beggining, it should be
5635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // moved to end.
5636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(wasDoingPrimary) {
5637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    s.iterator->move(s.iterator, 0, UITER_LIMIT);
5638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
5639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for(;;) {
5641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(i == count) {
5642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto saveState;
5643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(canUpdateState) {
5645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        newState = s.iterator->getState(s.iterator);
5646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(newState != UITER_NO_STATE) {
5647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            iterState = newState;
5648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            cces = 0;
5649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE = ucol_IGetPrevCE(coll, &s, status);
5652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces++;
5653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE==UCOL_NO_MORE_CES) {
5654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Add the level separator
5655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        terminatePSKLevel(level, maxLevel, i, dest);
5656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        byteCountOrFrenchDone = 0;
5657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Restart the iteration an move to the next level
5658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        s.iterator->move(s.iterator, 0, UITER_START);
5659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        level = UCOL_PSK_CASE;
5660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
5661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(isContinuation(CE)) { // if it's a continuation, we want to save it and
5663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // reverse when we get a first non-continuation CE.
5664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE >>= 8;
5665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        frenchBuff[frenchIndex++] = (uint8_t)CE;
5666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else if(!isShiftedCE(CE, LVT, &wasShifted)) {
5667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE >>= 8; /* get secondary */
5668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(!frenchIndex) {
5669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(CE != 0) {
5670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                dest[i++]=(uint8_t)CE;
5671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
5673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            frenchBuff[frenchIndex++] = (uint8_t)CE;
5674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            frenchIndex -= usedFrench;
5675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            usedFrench = 0;
5676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while(i < count && frenchIndex) {
5677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                dest[i++] = frenchBuff[--frenchIndex];
5678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                usedFrench++;
5679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(uprv_numAvailableExpCEs(s)) {
5683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = FALSE;
5684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = TRUE;
5686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
5690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_CASE;
5691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
5693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_CASE:
5694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
5695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t caseShift = UCOL_CASE_SHIFT_START;
5696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint8_t caseByte = UCOL_CASE_BYTE_START;
5697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint8_t caseBits = 0;
5698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
570050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(caseShift <= UCOL_CASE_SHIFT_START);
5701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(i == count) {
5702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto saveState;
5703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // We should save the state only if we
5705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // are sure that we are done with the
5706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // previous iterator state
5707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(canUpdateState) {
5708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    newState = s.iterator->getState(s.iterator);
5709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(newState != UITER_NO_STATE) {
5710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        iterState = newState;
5711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        cces = 0;
5712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &s, status);
5715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                cces++;
5716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE==UCOL_NO_MORE_CES) {
5717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // On the case level we might have an unfinished
5718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // case byte. Add one if it's started.
5719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(caseShift != UCOL_CASE_SHIFT_START) {
5720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        dest[i++] = caseByte;
5721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
5723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // We have finished processing CEs on this level.
5724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // However, we don't know if we have enough space
5725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // to add a case level terminator.
5726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(i < count) {
5727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Add the level separator
5728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        terminatePSKLevel(level, maxLevel, i, dest);
5729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // Restart the iteration and move to the
5730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // next level
5731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        s.iterator->move(s.iterator, 0, UITER_START);
5732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        level = UCOL_PSK_TERTIARY;
5733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        canUpdateState = FALSE;
5735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
5737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isShiftedCE(CE, LVT, &wasShifted)) {
5740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 || strength > UCOL_PRIMARY)) {
5741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // do the case level if we need to do it. We don't want to calculate
5742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // case level for primary ignorables if we have only primary strength and case level
5743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // otherwise we would break well formedness of CEs
5744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
5745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        caseBits = (uint8_t)(CE & 0xC0);
5746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // this copies the case level logic from the
5747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // sort key generation code
5748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(CE != 0) {
574950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (caseShift == 0) {
575050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                dest[i++] = caseByte;
575150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                caseShift = UCOL_CASE_SHIFT_START;
575250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                caseByte = UCOL_CASE_BYTE_START;
575350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
5754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(coll->caseFirst == UCOL_UPPER_FIRST) {
5755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if((caseBits & 0xC0) == 0) {
5756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= 1 << (--caseShift);
5757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
5758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= 0 << (--caseShift);
5759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    /* second bit */
5760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(caseShift == 0) {
5761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        dest[i++] = caseByte;
5762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        caseShift = UCOL_CASE_SHIFT_START;
5763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        caseByte = UCOL_CASE_BYTE_START;
5764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
5765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= ((caseBits>>6)&1) << (--caseShift);
5766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
5767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else {
5768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                if((caseBits & 0xC0) == 0) {
5769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= 0 << (--caseShift);
5770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                } else {
5771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= 1 << (--caseShift);
5772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    /* second bit */
5773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(caseShift == 0) {
5774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        dest[i++] = caseByte;
5775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        caseShift = UCOL_CASE_SHIFT_START;
5776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        caseByte = UCOL_CASE_BYTE_START;
5777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
5778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    caseByte |= ((caseBits>>7)&1) << (--caseShift);
5779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
5780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Not sure this is correct for the case level - revisit
5786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(uprv_numAvailableExpCEs(s)) {
5787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = FALSE;
5788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = TRUE;
5790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
5793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_TERTIARY;
5794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
5796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_TERTIARY:
5797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(strength >= UCOL_TERTIARY) {
5798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
5799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(i == count) {
5800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto saveState;
5801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // We should save the state only if we
5803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // are sure that we are done with the
5804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // previous iterator state
5805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(canUpdateState) {
5806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    newState = s.iterator->getState(s.iterator);
5807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(newState != UITER_NO_STATE) {
5808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        iterState = newState;
5809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        cces = 0;
5810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &s, status);
5813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                cces++;
5814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE==UCOL_NO_MORE_CES) {
5815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Add the level separator
5816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    terminatePSKLevel(level, maxLevel, i, dest);
5817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    byteCountOrFrenchDone = 0;
5818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Restart the iteration an move to the
5819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // second level
5820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    s.iterator->move(s.iterator, 0, UITER_START);
5821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
5822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    level = UCOL_PSK_QUATERNARY;
5823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
5824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isShiftedCE(CE, LVT, &wasShifted)) {
5826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    notIsContinuation = !isContinuation(CE);
5827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(notIsContinuation) {
5829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
5830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE ^= coll->caseSwitch;
5831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE &= coll->tertiaryMask;
5832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
5833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
5834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE != 0) {
5837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        dest[i++]=(uint8_t)CE;
5838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(uprv_numAvailableExpCEs(s)) {
5841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = FALSE;
5842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = TRUE;
5844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
5847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // if we're not doing tertiary
5848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // skip to the end
5849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_NULL;
5850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
5852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_QUATERNARY:
5853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(strength >= UCOL_QUATERNARY) {
5854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
5855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(i == count) {
5856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto saveState;
5857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // We should save the state only if we
5859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // are sure that we are done with the
5860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // previous iterator state
5861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(canUpdateState) {
5862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    newState = s.iterator->getState(s.iterator);
5863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(newState != UITER_NO_STATE) {
5864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        iterState = newState;
5865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        cces = 0;
5866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = ucol_IGetNextCE(coll, &s, status);
5869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                cces++;
5870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE==UCOL_NO_MORE_CES) {
5871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Add the level separator
5872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    terminatePSKLevel(level, maxLevel, i, dest);
5873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //dest[i++] = UCOL_LEVELTERMINATOR;
5874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    byteCountOrFrenchDone = 0;
5875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Restart the iteration an move to the
5876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // second level
5877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    s.iterator->move(s.iterator, 0, UITER_START);
5878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
5879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    level = UCOL_PSK_QUIN;
5880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
5881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(CE==0)
5883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
5884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(isShiftedCE(CE, LVT, &wasShifted)) {
5885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE >>= 16; /* get primary */
5886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(CE != 0) {
5887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(byteCountOrFrenchDone == 0) {
5888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++]=(uint8_t)(CE >> 8);
5889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
5890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            byteCountOrFrenchDone = 0;
5891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if((CE &=0xff)!=0) {
5893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(i==count) {
5894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                /* overflow */
5895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                byteCountOrFrenchDone = 1;
5896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goto saveState;
5897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++]=(uint8_t)CE;
5899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    notIsContinuation = !isContinuation(CE);
5903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(notIsContinuation) {
5904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
5905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++] = UCOL_HIRAGANA_QUAD;
5906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
5907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            dest[i++] = 0xFF;
5908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
5909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(uprv_numAvailableExpCEs(s)) {
5912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = FALSE;
5913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    canUpdateState = TRUE;
5915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
5918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // if we're not doing quaternary
5919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // skip to the end
5920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_NULL;
5921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
5923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_QUIN:
5924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        level = UCOL_PSK_IDENTICAL;
5925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
5926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_IDENTICAL:
5927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(strength >= UCOL_IDENTICAL) {
5928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UChar32 first, second;
5929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            int32_t bocsuBytesWritten = 0;
5930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // We always need to do identical on
5931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // the NFD form of the string.
5932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(normIter == NULL) {
5933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // we arrived from the level below and
5934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // normalization was not turned on.
5935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // therefore, we need to make a fresh NFD iterator
5936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
5937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
5938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else if(!doingIdenticalFromStart) {
5939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // there is an iterator, but we did some other levels.
5940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // therefore, we have a FCD iterator - need to make
5941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // a NFD one.
5942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // normIter being at the beginning does not guarantee
5943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // that the underlying iterator is at the beginning
5944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                iter->move(iter, 0, UITER_START);
5945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
5946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // At this point we have a NFD iterator that is positioned
5948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // in the right place
5949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(U_FAILURE(*status)) {
5950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UTRACE_EXIT_STATUS(*status);
5951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return 0;
5952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            first = uiter_previous32(s.iterator);
5954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // maybe we're at the start of the string
5955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(first == U_SENTINEL) {
5956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                first = 0;
5957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uiter_next32(s.iterator);
5959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            j = 0;
5962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
5963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(i == count) {
5964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(j+1 < bocsuBytesWritten) {
5965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        bocsuBytesUsed = j+1;
5966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
5967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto saveState;
5968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // On identical level, we will always save
5971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // the state if we reach this point, since
5972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // we don't depend on getNextCE for content
5973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // all the content is in our buffer and we
5974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // already either stored the full buffer OR
5975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // otherwise we won't arrive here.
5976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                newState = s.iterator->getState(s.iterator);
5977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(newState != UITER_NO_STATE) {
5978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    iterState = newState;
5979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    cces = 0;
5980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                uint8_t buff[4];
5983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                second = uiter_next32(s.iterator);
5984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                cces++;
5985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // end condition for identical level
5987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(second == U_SENTINEL) {
5988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    terminatePSKLevel(level, maxLevel, i, dest);
5989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    level = UCOL_PSK_NULL;
5990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
5991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, second, buff);
5993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                first = second;
5994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
5995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                j = 0;
5996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(bocsuBytesUsed != 0) {
5997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    while(bocsuBytesUsed-->0) {
5998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        j++;
5999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(i < count && j < bocsuBytesWritten) {
6003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    dest[i++] = buff[j++];
6004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            level = UCOL_PSK_NULL;
6009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* fall through to next level */
6011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_PSK_NULL:
6012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        j = i;
6013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(j<count) {
6014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            dest[j++]=0;
6015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        break;
6017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
6018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_INTERNAL_PROGRAM_ERROR;
6019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_STATUS(*status);
6020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusaveState:
6024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Now we need to return stuff. First we want to see whether we have
6025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // done everything for the current state of iterator.
6026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(byteCountOrFrenchDone
6027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || canUpdateState == FALSE
6028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        || (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE)
6029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
6030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Any of above mean that the previous transaction
6031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // wasn't finished and that we should store the
6032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // previous iterator state.
6033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        state[0] = iterState;
6034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
6035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // The transaction is complete. We will continue in the next iteration.
6036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        state[0] = s.iterator->getState(s.iterator);
6037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cces = 0;
6038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Store the number of bocsu bytes written.
6040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) {
6041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_INDEX_OUTOFBOUNDS_ERROR;
6042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BYTES_SHIFT;
6044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Next we put in the level of comparison
6046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT);
6047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If we are doing French, we need to store whether we have just finished the French level
6049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(level == UCOL_PSK_SECONDARY && doingFrench) {
6050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        state[1] |= (((state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
6051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
6052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        state[1] |= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
6053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Was the latest CE shifted
6056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(wasShifted) {
6057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        state[1] |= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT;
6058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Check for cces overflow
6060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) {
6061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_INDEX_OUTOFBOUNDS_ERROR;
6062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Store cces
6064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    state[1] |= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SHIFT);
6065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Check for French overflow
6067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) {
6068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_INDEX_OUTOFBOUNDS_ERROR;
6069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Store number of bytes written in the French secondary continuation sequence
6071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    state[1] |= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENCH_SHIFT);
6072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If we have used normalizing iterator, get rid of it
6075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(normIter != NULL) {
6076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(normIter);
6077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* To avoid memory leak, free the offset buffer if necessary. */
6080b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucol_freeOffsetBuffer(&s);
6081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Return number of meaningful sortkey bytes.
6083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
6084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  dest,i, state[0], state[1]);
6085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_EXIT_VALUE(i);
6086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return i;
6087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
6090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Produce a bound for a given sortkey and a number of levels.
6091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
6092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
6093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getBound(const uint8_t       *source,
6094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             sourceLength,
6095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UColBoundMode       boundType,
6096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t            noOfLevels,
6097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint8_t             *result,
6098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             resultLength,
6099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode          *status)
6100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
6101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // consistency checks
6102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(status == NULL || U_FAILURE(*status)) {
6103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(source == NULL) {
6106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
6107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t sourceIndex = 0;
6111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Scan the string until we skip enough of the key OR reach the end of the key
6112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    do {
6113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sourceIndex++;
6114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
6115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            noOfLevels--;
6116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } while (noOfLevels > 0
6118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
6119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
6121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        && noOfLevels > 0) {
6122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_SORT_KEY_TOO_SHORT_WARNING;
6123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // READ ME: this code assumes that the values for boundType
6127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // enum will not changes. They are set so that the enum value
6128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // corresponds to the number of extra bytes each bound type
6129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // needs.
6130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(result != NULL && resultLength >= sourceIndex+boundType) {
6131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(result, source, sourceIndex);
6132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        switch(boundType) {
6133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Lower bound just gets terminated. No extra bytes
6134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case UCOL_BOUND_LOWER: // = 0
6135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
6136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Upper bound needs one extra byte
6137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case UCOL_BOUND_UPPER: // = 1
6138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result[sourceIndex++] = 2;
6139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
6140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Upper long bound needs two extra bytes
6141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case UCOL_BOUND_UPPER_LONG: // = 2
6142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result[sourceIndex++] = 0xFF;
6143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            result[sourceIndex++] = 0xFF;
6144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
6145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        default:
6146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR;
6147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return 0;
6148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result[sourceIndex++] = 0;
6150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return sourceIndex;
6152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
6153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return sourceIndex+boundType+1;
6154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
6158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the functions that deal with the properties of a collator  */
6159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there are new APIs and some compatibility APIs                           */
6160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
6161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void
6163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
6164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    int32_t *primShift, int32_t *secShift, int32_t *terShift)
6165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
6166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
6167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool reverseSecondary = FALSE;
616827f654740f2a26ad62a5c155af9199af9e69b889claireho    UBool continuation = isContinuation(CE);
616927f654740f2a26ad62a5c155af9199af9e69b889claireho    if(!continuation) {
6170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tertiary = (uint8_t)((CE & coll->tertiaryMask));
6171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tertiary ^= coll->caseSwitch;
6172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        reverseSecondary = TRUE;
6173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else {
6174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
6175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tertiary &= UCOL_REMOVE_CASE;
6176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        reverseSecondary = FALSE;
6177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
6180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
6181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    primary1 = (uint8_t)(CE >> 8);
6182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(primary1 != 0) {
618427f654740f2a26ad62a5c155af9199af9e69b889claireho        if (coll->leadBytePermutationTable != NULL && !continuation) {
618527f654740f2a26ad62a5c155af9199af9e69b889claireho            primary1 = coll->leadBytePermutationTable[primary1];
618627f654740f2a26ad62a5c155af9199af9e69b889claireho        }
618727f654740f2a26ad62a5c155af9199af9e69b889claireho
6188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneCEs[ch] |= (primary1 << *primShift);
6189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *primShift -= 8;
6190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(primary2 != 0) {
6192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(*primShift < 0) {
6193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
6194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
6195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
6196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return;
6197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneCEs[ch] |= (primary2 << *primShift);
6199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *primShift -= 8;
6200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(secondary != 0) {
6202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse secondary
6203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space for secondary
6204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24);
6205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else { // normal case
6206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secShift);
6207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *secShift -= 8;
6209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(tertiary != 0) {
6211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneCEs[2*coll->latinOneTableLen+ch] |= (tertiary << *terShift);
6212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *terShift -= 8;
6213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool
6217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_resizeLatinOneTable(UCollator *coll, int32_t size, UErrorCode *status) {
6218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t *newTable = (uint32_t *)uprv_malloc(size*sizeof(uint32_t)*3);
6219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(newTable == NULL) {
6220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *status = U_MEMORY_ALLOCATION_ERROR;
6221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      coll->latinOneFailed = TRUE;
6222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return FALSE;
6223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTableLen)*sizeof(uint32_t);
6225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memset(newTable, 0, size*sizeof(uint32_t)*3);
6226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy);
6227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToCopy);
6228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(newTable+2*size, coll->latinOneCEs+2*coll->latinOneTableLen, sizeToCopy);
6229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    coll->latinOneTableLen = size;
6230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_free(coll->latinOneCEs);
6231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    coll->latinOneCEs = newTable;
6232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
6233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool
6236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
6237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool result = TRUE;
6238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(coll->latinOneCEs == NULL) {
6239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*UCOL_LATINONETABLELEN*3);
6240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->latinOneCEs == NULL) {
6241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
6242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return FALSE;
6243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneTableLen = UCOL_LATINONETABLELEN;
6245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar ch = 0;
6247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UCollationElements *it = ucol_openElements(coll, &ch, 1, status);
6248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Check for null pointer
6249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(*status)) {
6250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return FALSE;
6251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)*coll->latinOneTableLen*3);
6253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t primShift = 24, secShift = 24, terShift = 24;
6255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t CE = 0;
6256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1;
6257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // TODO: make safe if you get more than you wanted...
6259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) {
6260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        primShift = 24; secShift = 24; terShift = 24;
6261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(ch < 0x100) {
6262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE = coll->latinOneMapping[ch];
6263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
6265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(CE == UCOL_NOT_FOUND && coll->UCA) {
6266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
6267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(CE < UCOL_NOT_FOUND) {
6270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
6271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            switch (getCETag(CE)) {
6273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            case EXPANSION_TAG:
6274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            case DIGIT_TAG:
6275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                ucol_setText(it, &ch, 1, status);
6276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) {
6277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(primShift < 0 || secShift < 0 || terShift < 0) {
6278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
6279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
6280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
6281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
6282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
6283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
6284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
6286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            case CONTRACTION_TAG:
6287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // here is the trick
6288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // F2 is contraction. We do something very similar to contractions
6289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // but have two indices, one in the real contraction table and the
6290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // other to where we stuffed things. This hopes that we don't have
6291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // many contractions (this should work for latin-1 tables).
6292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                {
6293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((CE & 0x00FFF000) != 0) {
6294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        *status = U_UNSUPPORTED_ERROR;
6295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto cleanup_after_failure;
6296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
6297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
6299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
6301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneCEs[ch] = CE;
6303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneCEs[coll->latinOneTableLen+ch] = CE;
6304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE;
6305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // We're going to jump into contraction table, pick the elements
6307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // and use them
6308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    do {
6309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        CE = *(coll->contractionCEs +
6310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            (UCharOffset - coll->contractionIndex));
6311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) {
6312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uint32_t size;
6313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uint32_t i;    /* general counter */
6314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            uint32_t *CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
6315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            size = getExpansionCount(CE);
6316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            //CE = *CEOffset++;
6317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
6318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                for(i = 0; i<size; i++) {
6319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(primShift < 0 || secShift < 0 || terShift < 0) {
6320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        break;
6324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
6325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
6326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
6327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            } else { /* else, we do */
6328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                while(*CEOffset != 0) {
6329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    if(primShift < 0 || secShift < 0 || terShift < 0) {
6330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                        break;
6334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    }
6335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                    ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
6336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                }
6337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            contractionOffset++;
6339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if(CE < UCOL_NOT_FOUND) {
6340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            ucol_addLatinOneEntry(coll, (UChar)contractionOffset++, CE, &primShift, &secShift, &terShift);
6341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
6342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
6345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            contractionOffset++;
6346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCharOffset++;
6348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        primShift = 24; secShift = 24; terShift = 24;
6349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate
6350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) {
6351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                goto cleanup_after_failure;
6352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
6354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } while(*UCharOffset != 0xFFFF);
6355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
6356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;;
6357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            case SPEC_PROC_TAG:
6358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                {
6359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // 0xB7 is a precontext character defined in UCA5.1, a special
6360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // handle is implemeted in order to save LatinOne table for
6361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // most locales.
6362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (ch==0xb7) {
6363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
6364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    else {
6366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto cleanup_after_failure;
6367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
6368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
6369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
6370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            default:
6371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto cleanup_after_failure;
6372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // compact table
6376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(contractionOffset < coll->latinOneTableLen) {
6377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) {
6378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            goto cleanup_after_failure;
6379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ucol_closeElements(it);
6382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return result;
6383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querucleanup_after_failure:
6385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // status should already be set before arriving here.
6386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    coll->latinOneFailed = TRUE;
6387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ucol_closeElements(it);
6388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return FALSE;
6389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid ucol_updateInternalState(UCollator *coll, UErrorCode *status) {
6392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_SUCCESS(*status)) {
6393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->caseFirst == UCOL_UPPER_FIRST) {
6394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->caseSwitch = UCOL_CASE_SWITCH;
6395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->caseSwitch = UCOL_NO_CASE_SWITCH;
6397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) {
6400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryMask = UCOL_REMOVE_CASE;
6401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
6402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF; /* Should be 0x80 */
6403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF;
6404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryBottom = UCOL_COMMON_BOT3;
6405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryMask = UCOL_KEEP_CASE;
6407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON;
6408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(coll->caseFirst == UCOL_UPPER_FIRST) {
6409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST;
6410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER;
6411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER;
6412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
6413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
6414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER;
6415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER;
6416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        /* Set the compression values */
6420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - coll->tertiaryBottom - 1);
6421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal); /* we multilply double with int, but need only int */
6422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount);
6423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY
6425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE)
6426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
6427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->sortKeyGen = ucol_calcSortKeySimpleTertiary;
6428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->sortKeyGen = ucol_calcSortKey;
6430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF
6432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed)
6433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
6434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) {
6435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it
6436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //fprintf(stderr, "F");
6437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneUse = TRUE;
6438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
6439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    coll->latinOneUse = FALSE;
6440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(*status == U_UNSUPPORTED_ERROR) {
6442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    *status = U_ZERO_ERROR;
6443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
6444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else { // latin1Table exists and it doesn't need to be regenerated, just use it
6445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                coll->latinOneUse = TRUE;
6446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
6447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->latinOneUse = FALSE;
6449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t  U_EXPORT2
6454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) {
6455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(len == -1) {
6459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        len = u_strlen(varTop);
6460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(len == 0) {
6462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
6463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collIterate s;
646750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, varTop, len, &s, status);
646850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
646950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
647050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
6471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t CE = ucol_IGetNextCE(coll, &s, status);
6473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* here we check if we have consumed all characters */
6475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* you can put in either one character or a contraction */
6476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* you shouldn't put more... */
6477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(s.pos != s.endp || CE == UCOL_NO_MORE_CES) {
6478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_CE_NOT_FOUND_ERROR;
6479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uint32_t nextCE = ucol_IGetNextCE(coll, &s, status);
6483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) {
6485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_PRIMARY_TOO_LONG_ERROR;
6486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) {
6489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->variableTopValueisDefault = FALSE;
6490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
6491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
6493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* To avoid memory leak, free the offset buffer if necessary. */
6494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucol_freeOffsetBuffer(&s);
6495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return CE & UCOL_PRIMARYMASK;
6497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
6500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return 0;
6502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return coll->variableTopValue<<16;
6504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void  U_EXPORT2
6507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
6508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
6510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) {
6513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->variableTopValueisDefault = FALSE;
6514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
6515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Attribute setter API */
6518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void  U_EXPORT2
6519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
6520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return;
6522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6523b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UColAttributeValue oldFrench = coll->frenchCollation;
6525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UColAttributeValue oldCaseFirst = coll->caseFirst;
6526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(attr) {
6527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_NUMERIC_COLLATION: /* sort substrings of digits as numbers */
6528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(value == UCOL_ON) {
6529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollation = UCOL_ON;
6530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollationisDefault = FALSE;
6531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollation = UCOL_OFF;
6533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollationisDefault = FALSE;
6534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollationisDefault = TRUE;
6536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->numericCollation = (UColAttributeValue)coll->options->numericCollation;
6537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR;
6539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        break;
6541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_HIRAGANA_QUATERNARY_MODE: /* special quaternary values for Hiragana */
6542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(value == UCOL_ON) {
6543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQ = UCOL_ON;
6544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQisDefault = FALSE;
6545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQ = UCOL_OFF;
6547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQisDefault = FALSE;
6548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQisDefault = TRUE;
6550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->hiraganaQ = (UColAttributeValue)coll->options->hiraganaQ;
6551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
6552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR;
6553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        break;
6555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
6556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_ON) {
6557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollation = UCOL_ON;
6558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollationisDefault = FALSE;
6559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollation = UCOL_OFF;
6561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollationisDefault = FALSE;
6562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollationisDefault = TRUE;
6564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->frenchCollation = (UColAttributeValue)coll->options->frenchCollation;
6565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
6567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
6570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_SHIFTED) {
6571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandling = UCOL_SHIFTED;
6572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandlingisDefault = FALSE;
6573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_NON_IGNORABLE) {
6574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandling = UCOL_NON_IGNORABLE;
6575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandlingisDefault = FALSE;
6576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandlingisDefault = TRUE;
6578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->alternateHandling = (UColAttributeValue)coll->options->alternateHandling ;
6579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
6581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
6584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_LOWER_FIRST) {
6585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirst = UCOL_LOWER_FIRST;
6586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirstisDefault = FALSE;
6587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_UPPER_FIRST) {
6588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirst = UCOL_UPPER_FIRST;
6589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirstisDefault = FALSE;
6590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->caseFirst = UCOL_OFF;
6592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            coll->caseFirstisDefault = FALSE;
6593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirst = (UColAttributeValue)coll->options->caseFirst;
6595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseFirstisDefault = TRUE;
6596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
6598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_CASE_LEVEL: /* do we have an extra case level */
6601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_ON) {
6602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevel = UCOL_ON;
6603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevelisDefault = FALSE;
6604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevel = UCOL_OFF;
6606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevelisDefault = FALSE;
6607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevel = (UColAttributeValue)coll->options->caseLevel;
6609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->caseLevelisDefault = TRUE;
6610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
6612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
6615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(value == UCOL_ON) {
6616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationMode = UCOL_ON;
6617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationModeisDefault = FALSE;
661827f654740f2a26ad62a5c155af9199af9e69b889claireho            initializeFCD(status);
6619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_OFF) {
6620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationMode = UCOL_OFF;
6621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationModeisDefault = FALSE;
6622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value == UCOL_DEFAULT) {
6623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationModeisDefault = TRUE;
6624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->normalizationMode = (UColAttributeValue)coll->options->normalizationMode;
662527f654740f2a26ad62a5c155af9199af9e69b889claireho            if(coll->normalizationMode == UCOL_ON) {
662627f654740f2a26ad62a5c155af9199af9e69b889claireho                initializeFCD(status);
662727f654740f2a26ad62a5c155af9199af9e69b889claireho            }
6628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
6630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_STRENGTH:         /* attribute for strength */
6633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (value == UCOL_DEFAULT) {
6634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->strengthisDefault = TRUE;
6635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->strength = (UColAttributeValue)coll->options->strength;
6636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (value <= UCOL_IDENTICAL) {
6637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->strengthisDefault = FALSE;
6638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            coll->strength = value;
6639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
6640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
6641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_ATTRIBUTE_COUNT:
6644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
6645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
6646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) {
6649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneRegenTable = TRUE;
6650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
6651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        coll->latinOneRegenTable = FALSE;
6652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucol_updateInternalState(coll, status);
6654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UColAttributeValue  U_EXPORT2
6657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
6658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL) {
6659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return UCOL_DEFAULT;
6660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(attr) {
6662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_NUMERIC_COLLATION:
6663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return coll->numericCollation;
6664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_HIRAGANA_QUATERNARY_MODE:
6665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      return coll->hiraganaQ;
6666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
6667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->frenchCollation;
6668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
6669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->alternateHandling;
6670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
6671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->caseFirst;
6672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_CASE_LEVEL: /* do we have an extra case level */
6673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->caseLevel;
6674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
6675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->normalizationMode;
6676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_STRENGTH:         /* attribute for strength */
6677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return coll->strength;
6678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case UCOL_ATTRIBUTE_COUNT:
6679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
6680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
6681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
6682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return UCOL_DEFAULT;
6684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
6687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_setStrength(    UCollator                *coll,
6688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UCollationStrength        strength)
6689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
6690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
6691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
6692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationStrength U_EXPORT2
6695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getStrength(const UCollator *coll)
6696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
6697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
6698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
6699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6701b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_DRAFT int32_t U_EXPORT2
670227f654740f2a26ad62a5c155af9199af9e69b889clairehoucol_getReorderCodes(const UCollator *coll,
670327f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t *dest,
670427f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t destCapacity,
6705b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    UErrorCode *status) {
6706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(*status)) {
670727f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
670827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
6709b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
671027f654740f2a26ad62a5c155af9199af9e69b889claireho    if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
6711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *status = U_ILLEGAL_ARGUMENT_ERROR;
671227f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
671327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
6714b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6715b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#ifdef UCOL_DEBUG
6716b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    printf("coll->reorderCodesLength = %d\n", coll->reorderCodesLength);
6717b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    printf("coll->defaultReorderCodesLength = %d\n", coll->defaultReorderCodesLength);
6718b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif
6719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
672027f654740f2a26ad62a5c155af9199af9e69b889claireho    if (coll->reorderCodesLength > destCapacity) {
6721b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *status = U_BUFFER_OVERFLOW_ERROR;
672227f654740f2a26ad62a5c155af9199af9e69b889claireho        return coll->reorderCodesLength;
672327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
672427f654740f2a26ad62a5c155af9199af9e69b889claireho    for (int32_t i = 0; i < coll->reorderCodesLength; i++) {
672527f654740f2a26ad62a5c155af9199af9e69b889claireho        dest[i] = coll->reorderCodes[i];
672627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
672727f654740f2a26ad62a5c155af9199af9e69b889claireho    return coll->reorderCodesLength;
672827f654740f2a26ad62a5c155af9199af9e69b889claireho}
672927f654740f2a26ad62a5c155af9199af9e69b889claireho
6730b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_DRAFT void U_EXPORT2
6731b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoucol_setReorderCodes(UCollator* coll,
6732b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    const int32_t* reorderCodes,
673327f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t reorderCodesLength,
6734b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    UErrorCode *status) {
6735b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(*status)) {
673627f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
673727f654740f2a26ad62a5c155af9199af9e69b889claireho    }
673827f654740f2a26ad62a5c155af9199af9e69b889claireho
673927f654740f2a26ad62a5c155af9199af9e69b889claireho    if (reorderCodesLength < 0 || (reorderCodesLength > 0 && reorderCodes == NULL)) {
6740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *status = U_ILLEGAL_ARGUMENT_ERROR;
674127f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
674227f654740f2a26ad62a5c155af9199af9e69b889claireho    }
674327f654740f2a26ad62a5c155af9199af9e69b889claireho
6744b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) {
6745b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uprv_free(coll->reorderCodes);
6746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
674727f654740f2a26ad62a5c155af9199af9e69b889claireho    coll->reorderCodes = NULL;
674827f654740f2a26ad62a5c155af9199af9e69b889claireho    coll->reorderCodesLength = 0;
674927f654740f2a26ad62a5c155af9199af9e69b889claireho    if (reorderCodesLength == 0) {
6750b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
6751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            uprv_free(coll->leadBytePermutationTable);
6752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
675327f654740f2a26ad62a5c155af9199af9e69b889claireho        coll->leadBytePermutationTable = NULL;
675427f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
675527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
675627f654740f2a26ad62a5c155af9199af9e69b889claireho    coll->reorderCodes = (int32_t*) uprv_malloc(reorderCodesLength * sizeof(int32_t));
675727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (coll->reorderCodes == NULL) {
6758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *status = U_MEMORY_ALLOCATION_ERROR;
675927f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
676027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
6761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    coll->freeReorderCodesOnClose = TRUE;
676227f654740f2a26ad62a5c155af9199af9e69b889claireho    for (int32_t i = 0; i < reorderCodesLength; i++) {
676327f654740f2a26ad62a5c155af9199af9e69b889claireho        coll->reorderCodes[i] = reorderCodes[i];
676427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
676527f654740f2a26ad62a5c155af9199af9e69b889claireho    coll->reorderCodesLength = reorderCodesLength;
6766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ucol_buildPermutationTable(coll, status);
6767b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
6768b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_DRAFT int32_t U_EXPORT2
6770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoucol_getEquivalentReorderCodes(int32_t reorderCode,
6771b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    int32_t* dest,
6772b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    int32_t destCapacity,
6773b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    UErrorCode *pErrorCode) {
6774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    bool equivalentCodesSet[USCRIPT_CODE_LIMIT];
6775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uint16_t leadBytes[256];
6776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int leadBytesCount;
6777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int leadByteIndex;
6778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int16_t reorderCodesForLeadByte[USCRIPT_CODE_LIMIT];
6779b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int reorderCodesForLeadByteCount;
6780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int reorderCodeIndex;
6781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6782b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t equivalentCodesCount = 0;
6783b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int setIndex;
6784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
678527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(*pErrorCode)) {
6786b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
6787b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
6788b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6789b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
6790b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
6791b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
6792b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
6793b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6794b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uprv_memset(equivalentCodesSet, 0, USCRIPT_CODE_LIMIT * sizeof(bool));
6795b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6796b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UCollator* uca = ucol_initUCA(pErrorCode);
6797b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(*pErrorCode)) {
6798b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho	return 0;
6799b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
6800b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    leadBytesCount = ucol_getLeadBytesForReorderCode(uca, reorderCode, leadBytes, 256);
6801b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for (leadByteIndex = 0; leadByteIndex < leadBytesCount; leadByteIndex++) {
6802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        reorderCodesForLeadByteCount = ucol_getReorderCodesForLeadByte(
6803b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            uca, leadBytes[leadByteIndex], reorderCodesForLeadByte, USCRIPT_CODE_LIMIT);
6804b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodesForLeadByteCount; reorderCodeIndex++) {
6805b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            equivalentCodesSet[reorderCodesForLeadByte[reorderCodeIndex]] = true;
6806b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
6807b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
6808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6809b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) {
6810b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (equivalentCodesSet[setIndex] == true) {
6811b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            equivalentCodesCount++;
6812b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
6813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
6814b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6815b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (destCapacity == 0) {
6816b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return equivalentCodesCount;
6817b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
6818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
6819b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    equivalentCodesCount = 0;
6820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) {
6821b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (equivalentCodesSet[setIndex] == true) {
6822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            dest[equivalentCodesCount++] = setIndex;
6823b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (equivalentCodesCount >= destCapacity) {
6824b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
6825b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
6826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
6827b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
6828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return equivalentCodesCount;
682927f654740f2a26ad62a5c155af9199af9e69b889claireho}
683027f654740f2a26ad62a5c155af9199af9e69b889claireho
683127f654740f2a26ad62a5c155af9199af9e69b889claireho
6832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
6833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are misc functions                                             */
6834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* there are new APIs and some compatibility APIs                           */
6835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
6836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
6838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getVersion(const UCollator* coll,
6839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UVersionInfo versionInfo)
6840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
6841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* RunTime version  */
6842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t rtVersion = UCOL_RUNTIME_VERSION;
6843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Builder version*/
6844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t bdVersion = coll->image->version[0];
6845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Charset Version. Need to get the version from cnv files
6847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * makeconv should populate cnv files with version and
6848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * an api has to be provided in ucnv.h to obtain this version
6849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
6850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t csVersion = 0;
6851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* combine the version info */
6853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t cmbVersion = (uint16_t)((rtVersion<<11) | (bdVersion<<6) | (csVersion));
6854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Tailoring rules */
6856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    versionInfo[0] = (uint8_t)(cmbVersion>>8);
6857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    versionInfo[1] = (uint8_t)cmbVersion;
6858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    versionInfo[2] = coll->image->version[1];
6859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(coll->UCA) {
6860b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        /* Include the minor number when getting the UCA version. (major & 1f) << 3 | (minor & 7) */
6861b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        versionInfo[3] = (coll->UCA->image->UCAVersion[0] & 0x1f) << 3 | (coll->UCA->image->UCAVersion[1] & 0x07);
6862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
6863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        versionInfo[3] = 0;
6864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This internal API checks whether a character is tailored or not */
6869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool  U_EXPORT2
6870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) {
6871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(*status) || coll == NULL || coll == coll->UCA) {
6872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
6873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t CE = UCOL_NOT_FOUND;
6876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *ContractionStart = NULL;
6877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(u < 0x100) { /* latin-1 */
6878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CE = coll->latinOneMapping[u];
6879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) {
6880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
6881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else { /* regular */
6883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u);
6884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(isContraction(CE)) {
6887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ContractionStart = (UChar *)coll->image+getContractOffset(CE);
6888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex));
6889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UBool)(CE != UCOL_NOT_FOUND);
6892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
6896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Following are the string compare functions                               */
6897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                          */
6898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/****************************************************************************/
6899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  ucol_checkIdent    internal function.  Does byte level string compare.   */
6902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                     Used by strcoll if strength == identical and strings  */
690350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*                     are otherwise equal.                                  */
6904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                           */
6905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                     Comparison must be done on NFD normalized strings.    */
6906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                     FCD is not good enough.                               */
6907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
6909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUCollationResult    ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status)
6910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
691150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // When we arrive here, we can have normal strings or UCharIterators. Currently they are both
691250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // of same type, but that doesn't really mean that it will stay that way.
6913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t            comparison;
6914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sColl->flags & UCOL_USE_ITERATOR) {
691650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // The division for the array length may truncate the array size to
691750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
691850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // for all platforms anyway.
691950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
692050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
6921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UNormIterator *sNIt = NULL, *tNIt = NULL;
6922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
6923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
6924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sColl->iterator->move(sColl->iterator, 0, UITER_START);
6925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tColl->iterator->move(tColl->iterator, 0, UITER_START);
6926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, status);
6927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, status);
6928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        comparison = u_strCompareIter(sIt, tIt, TRUE);
6929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(sNIt);
6930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(tNIt);
6931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
693250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t sLen      = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl->endp - sColl->string) : -1;
693350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *sBuf = sColl->string;
693450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t tLen      = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl->endp - tColl->string) : -1;
693550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *tBuf = tColl->string;
6936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (normalize) {
6938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            *status = U_ZERO_ERROR;
693950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Note: We could use Normalizer::compare() or similar, but for short strings
694050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // which may not be in FCD it might be faster to just NFD them.
694150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather than
694250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // NFD'ing immediately might be faster for long strings,
694350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // but string comparison is usually done on relatively short strings.
694450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN) == 0, sBuf, sLen),
694550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  sColl->writableBuffer,
694650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  *status);
694750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN) == 0, tBuf, tLen),
694850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  tColl->writableBuffer,
694950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  *status);
695050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(U_FAILURE(*status)) {
695150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return UCOL_LESS;
6952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
695350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writableBuffer);
6954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
695550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE);
6956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
6957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (comparison < 0) {
6960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_LESS;
6961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (comparison == 0) {
6962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_EQUAL;
6963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* comparison > 0 */ {
6964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_GREATER;
6965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  CEBuf - A struct and some inline functions to handle the saving    */
6969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*          of CEs in a buffer within ucol_strcoll                     */
6970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_CEBUF_SIZE 512
6972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct ucol_CEBuf {
6973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *buf;
6974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *endp;
6975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *pos;
6976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t     localArray[UCOL_CEBUF_SIZE];
6977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} ucol_CEBuf;
6978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
6981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
6982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (b)->buf = (b)->pos = (b)->localArray;
6983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (b)->endp = (b)->buf + UCOL_CEBUF_SIZE;
6984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
6987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) {
6988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t  oldSize;
6989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t  newSize;
6990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t  *newBuf;
6991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ci->flags |= UCOL_ITER_ALLOCATED;
699350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    oldSize = (uint32_t)(b->pos - b->buf);
6994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    newSize = oldSize * 2;
6995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
6996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(newBuf == NULL) {
6997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_MEMORY_ALLOCATION_ERROR;
6998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
6999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    else {
7000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
7001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (b->buf != b->localArray) {
7002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uprv_free(b->buf);
7003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        b->buf = newBuf;
7005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        b->endp = b->buf + newSize;
7006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        b->pos  = b->buf + oldSize;
7007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic
7011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruinline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCode *status) {
7012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (b->pos == b->endp) {
7013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ucol_CEBuf_Expand(b, ci, status);
7014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_SUCCESS(*status)) {
7016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *(b)->pos++ = ce;
7017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is a trick string compare function that goes in and uses sortkeys to compare */
7021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* It is used when compare gets in trouble and needs to bail out                     */
7022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
7023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                  collIterate *tColl,
7024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                  UErrorCode *status)
7025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
7026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER];
7027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *sourceKeyP = sourceKey;
7028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *targetKeyP = targetKey;
7029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER;
7030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UCollator *coll = sColl->coll;
703150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *source = NULL;
703250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *target = NULL;
7033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result = UCOL_EQUAL;
703450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString sourceString, targetString;
703550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t sourceLength;
703650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t targetLength;
7037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(sColl->flags & UCOL_USE_ITERATOR) {
7039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sColl->iterator->move(sColl->iterator, 0, UITER_START);
7040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        tColl->iterator->move(tColl->iterator, 0, UITER_START);
704150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
704250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while((c=sColl->iterator->next(sColl->iterator))>=0) {
704350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            sourceString.append((UChar)c);
704450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
704550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while((c=tColl->iterator->next(tColl->iterator))>=0) {
704650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            targetString.append((UChar)c);
704750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
704850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        source = sourceString.getBuffer();
704950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sourceLength = sourceString.length();
705050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        target = targetString.getBuffer();
705150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        targetLength = targetString.length();
7052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else { // no iterators
705350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sColl->string):-1;
705450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tColl->string):-1;
7055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source = sColl->string;
7056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        target = tColl->string;
7057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
7062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(sourceKeyLen > UCOL_MAX_BUFFER) {
7063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sourceKeyP = (uint8_t*)uprv_malloc(sourceKeyLen*sizeof(uint8_t));
7064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(sourceKeyP == NULL) {
7065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
7066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto cleanup_and_do_compare;
7067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
7069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
7072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(targetKeyLen > UCOL_MAX_BUFFER) {
7073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        targetKeyP = (uint8_t*)uprv_malloc(targetKeyLen*sizeof(uint8_t));
7074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(targetKeyP == NULL) {
7075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *status = U_MEMORY_ALLOCATION_ERROR;
7076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto cleanup_and_do_compare;
7077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
7079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP);
7082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querucleanup_and_do_compare:
7084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(sourceKeyP != NULL && sourceKeyP != sourceKey) {
7085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(sourceKeyP);
7086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(targetKeyP != NULL && targetKeyP != targetKey) {
7089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(targetKeyP);
7090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(result<0) {
7093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_LESS;
7094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(result>0) {
7095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_GREATER;
7096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
7097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UCOL_EQUAL;
7098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
710250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult
710350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status)
7104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
7105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ALIGN_CODE(16);
7106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UCollator *coll = sColl->coll;
7108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // setting up the collator parameters
7111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UColAttributeValue strength = coll->strength;
7112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool initialCheckSecTer = (strength  >= UCOL_SECONDARY);
7113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkSecTer = initialCheckSecTer;
7115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkTertiary = (strength  >= UCOL_TERTIARY);
7116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkQuad = (strength  >= UCOL_QUATERNARY);
7117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkIdent = (strength == UCOL_IDENTICAL);
7118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool checkCase = (coll->caseLevel == UCOL_ON);
7119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer;
7120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
7121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool qShifted = shifted && checkQuad;
7122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad;
7123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(doHiragana && shifted) {
7125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return (ucol_compareUsingSortKeys(sColl, tColl, status));
7126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t caseSwitch = coll->caseSwitch;
7128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t tertiaryMask = coll->tertiaryMask;
7129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This is the lowest primary value that will not be ignored if shifted
7131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0;
7132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollationResult result = UCOL_EQUAL;
7134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCollationResult hirResult = UCOL_EQUAL;
7135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Preparing the CE buffers. They will be filled during the primary phase
7137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucol_CEBuf   sCEs;
7138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucol_CEBuf   tCEs;
7139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_INIT_CEBUF(&sCEs);
7140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCOL_INIT_CEBUF(&tCEs);
7141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t secS = 0, secT = 0;
7143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sOrder=0, tOrder=0;
7144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Non shifted primary processing is quite simple
7146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!shifted) {
7147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
7148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // We fetch CEs until we hit a non ignorable primary or end.
7150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            do {
7151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // We get the next CE
7152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder = ucol_IGetNextCE(coll, sColl, status);
7153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Stuff it in the buffer
7154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // And keep just the primary part.
7156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sOrder &= UCOL_PRIMARYMASK;
7157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } while(sOrder == 0);
7158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // see the comments on the above block
7160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            do {
7161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder = ucol_IGetNextCE(coll, tColl, status);
7162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder &= UCOL_PRIMARYMASK;
7164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } while(tOrder == 0);
7165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // if both primaries are the same
7167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sOrder == tOrder) {
7168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // and there are no more CEs, we advance to the next level
7169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
7170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(doHiragana && hirResult == UCOL_EQUAL) {
7173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) {
7174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA))
7175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            ? UCOL_LESS:UCOL_GREATER;
7176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
717927f654740f2a26ad62a5c155af9199af9e69b889claireho                // only need to check one for continuation
718027f654740f2a26ad62a5c155af9199af9e69b889claireho                // if one is then the other must be or the preceding CE would be a prefix of the other
718127f654740f2a26ad62a5c155af9199af9e69b889claireho                if (coll->leadBytePermutationTable != NULL && !isContinuation(sOrder)) {
718227f654740f2a26ad62a5c155af9199af9e69b889claireho                    sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
718327f654740f2a26ad62a5c155af9199af9e69b889claireho                    tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
718427f654740f2a26ad62a5c155af9199af9e69b889claireho                }
7185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // if two primaries are different, we are done
7186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = (sOrder < tOrder) ?  UCOL_LESS: UCOL_GREATER;
7187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } // no primary difference... do the rest from the buffers
7190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // shifted - do a slightly more complicated processing :)
7191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
7192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UBool sInShifted = FALSE;
7193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UBool tInShifted = FALSE;
7194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // This version of code can be refactored. However, it seems easier to understand this way.
7195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Source loop. Sam as the target loop.
7196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder = ucol_IGetNextCE(coll, sColl, status);
7198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == UCOL_NO_MORE_CES) {
7199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
7202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* UCA amendment - ignore ignorables that follow shifted code points */
7203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(isContinuation(sOrder)) {
7205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
7206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(sInShifted) {
7207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
7208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            break;
7213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else { /* Just lower level values */
7215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(sInShifted) {
7216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* regular */
722327f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(coll->leadBytePermutationTable != NULL){
722427f654740f2a26ad62a5c155af9199af9e69b889claireho                        sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
722527f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
7226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((sOrder & UCOL_PRIMARYMASK) > LVT) {
7227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if((sOrder & UCOL_PRIMARYMASK) > 0) {
7231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sInShifted = TRUE;
7232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sOrder &= UCOL_PRIMARYMASK;
7233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
7237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sInShifted = FALSE;
7238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sOrder &= UCOL_PRIMARYMASK;
7244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sInShifted = FALSE;
7245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder = ucol_IGetNextCE(coll, tColl, status);
7248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tOrder == UCOL_NO_MORE_CES) {
7249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
7252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    /* UCA amendment - ignore ignorables that follow shifted code points */
7253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(isContinuation(tOrder)) {
7255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
7256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(tInShifted) {
7257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
7258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            break;
7263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else { /* Just lower level values */
7265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(tInShifted) {
7266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else { /* regular */
727327f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(coll->leadBytePermutationTable != NULL){
727427f654740f2a26ad62a5c155af9199af9e69b889claireho                        tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
727527f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
7276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if((tOrder & UCOL_PRIMARYMASK) > LVT) {
7277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if((tOrder & UCOL_PRIMARYMASK) > 0) {
7281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tInShifted = TRUE;
7282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tOrder &= UCOL_PRIMARYMASK;
7283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
7287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tInShifted = FALSE;
7288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tOrder &= UCOL_PRIMARYMASK;
7294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tInShifted = FALSE;
7295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sOrder == tOrder) {
7297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                /*
7298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(doHiragana && hirResult == UCOL_EQUAL) {
7299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_HIRAGANA)) {
7300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA))
7301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                ? UCOL_LESS:UCOL_GREATER;
7302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                */
7305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
7306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = 0;
7309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = 0;
7310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
7313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER;
7314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } /* no primary difference... do the rest from the buffers */
7317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* now, we're gonna reexamine collected CEs */
7320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *sCE;
7321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t    *tCE;
7322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* This is the secondary level of comparison */
7324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(checkSecTer) {
7325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(!isFrenchSec) { /* normal */
7326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sCE = sCEs.buf;
7327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tCE = tCEs.buf;
7328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (secS == 0) {
7330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = *(sCE++) & UCOL_SECONDARYMASK;
7331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(secT == 0) {
7334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT = *(tCE++) & UCOL_SECONDARYMASK;
7335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(secS == secT) {
7338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(secS == UCOL_NO_MORE_CES_SECONDARY) {
7339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = 0; secT = 0;
7342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        continue;
7343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
7346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto commonReturn;
7347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else { /* do the French */
7350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t *sCESave = NULL;
7351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t *tCESave = NULL;
7352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sCE = sCEs.pos-2; /* this could also be sCEs-- if needs to be optimized */
7353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tCE = tCEs.pos-2;
7354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while (secS == 0 && sCE >= sCEs.buf) {
735627f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(sCESave == NULL) {
7357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = *(sCE--);
7358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(isContinuation(secS)) {
7359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while(isContinuation(secS = *(sCE--)))
7360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                ;
7361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* after this, secS has the start of continuation, and sCEs points before that */
7362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sCESave = sCE; /* we save it, so that we know where to come back AND that we need to go forward */
7363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sCE+=2;  /* need to point to the first continuation CP */
7364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* However, now you can just continue doing stuff */
7365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = *(sCE++);
7368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(!isContinuation(secS)) { /* This means we have finished with this cont */
7369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            sCE = sCESave;            /* reset the pointer to before continuation */
737027f654740f2a26ad62a5c155af9199af9e69b889claireho                            sCESave = NULL;
737127f654740f2a26ad62a5c155af9199af9e69b889claireho                            secS = 0;  /* Fetch a fresh CE before the continuation sequence. */
7372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS &= UCOL_SECONDARYMASK; /* remove the continuation bit */
7376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(secT == 0 && tCE >= tCEs.buf) {
737927f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(tCESave == NULL) {
7380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT = *(tCE--);
7381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(isContinuation(secT)) {
7382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            while(isContinuation(secT = *(tCE--)))
7383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                ;
7384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* after this, secS has the start of continuation, and sCEs points before that */
7385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tCESave = tCE; /* we save it, so that we know where to come back AND that we need to go forward */
7386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tCE+=2;  /* need to point to the first continuation CP */
7387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            /* However, now you can just continue doing stuff */
7388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT = *(tCE++);
7391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(!isContinuation(secT)) { /* This means we have finished with this cont */
7392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            tCE = tCESave;          /* reset the pointer to before continuation */
739327f654740f2a26ad62a5c155af9199af9e69b889claireho                            tCESave = NULL;
739427f654740f2a26ad62a5c155af9199af9e69b889claireho                            secT = 0;  /* Fetch a fresh CE before the continuation sequence. */
7395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            continue;
7396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT &= UCOL_SECONDARYMASK; /* remove the continuation bit */
7399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(secS == secT) {
7402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(secS == UCOL_NO_MORE_CES_SECONDARY || (sCE < sCEs.buf && tCE < tCEs.buf)) {
7403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = 0; secT = 0;
7406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        continue;
7407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
7410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto commonReturn;
7411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* doing the case bit */
7417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(checkCase) {
7418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sCE = sCEs.buf;
7419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tCE = tCEs.buf;
7420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
7421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while((secS & UCOL_REMOVE_CASE) == 0) {
7422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isContinuation(*sCE++)) {
7423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS =*(sCE-1);
7424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(((secS & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
7425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // primary ignorables should not be considered on the case level when the strength is primary
7426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // otherwise, the CEs stop being well-formed
7427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS &= UCOL_TERT_CASE_MASK;
7428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS ^= caseSwitch;
7429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secS = 0;
7431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = 0;
7434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while((secT & UCOL_REMOVE_CASE) == 0) {
7438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isContinuation(*tCE++)) {
7439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT = *(tCE-1);
7440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(((secT & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
7441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // primary ignorables should not be considered on the case level when the strength is primary
7442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // otherwise, the CEs stop being well-formed
7443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT &= UCOL_TERT_CASE_MASK;
7444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT ^= caseSwitch;
7445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        secT = 0;
7447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT = 0;
7450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) {
7454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = UCOL_LESS;
7455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK)) {
7457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = UCOL_GREATER;
7458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY || (secT & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) {
7462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
7463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secS = 0;
7465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secT = 0;
7466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Tertiary level */
7471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(checkTertiary) {
7472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secS = 0;
7473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secT = 0;
7474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sCE = sCEs.buf;
7475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tCE = tCEs.buf;
7476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
7477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while((secS & UCOL_REMOVE_CASE) == 0) {
7478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secS = *(sCE++) & tertiaryMask;
7479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isContinuation(secS)) {
7480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS ^= caseSwitch;
7481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS &= UCOL_REMOVE_CASE;
7483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while((secT & UCOL_REMOVE_CASE)  == 0) {
7487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secT = *(tCE++) & tertiaryMask;
7488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(!isContinuation(secT)) {
7489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT ^= caseSwitch;
7490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT &= UCOL_REMOVE_CASE;
7492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(secS == secT) {
7496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if((secS & UCOL_REMOVE_CASE) == 1) {
7497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = 0; secT = 0;
7500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
7504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(qShifted /*checkQuad*/) {
7511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UBool sInShifted = TRUE;
7512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UBool tInShifted = TRUE;
7513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secS = 0;
7514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        secT = 0;
7515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sCE = sCEs.buf;
7516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tCE = tCEs.buf;
7517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
751827f654740f2a26ad62a5c155af9199af9e69b889claireho            while((secS == 0 && secS != UCOL_NO_MORE_CES) || (isContinuation(secS) && !sInShifted)) {
7519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secS = *(sCE++);
7520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(isContinuation(secS)) {
7521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!sInShifted) {
7522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        continue;
7523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(secS > LVT || (secS & UCOL_PRIMARYMASK) == 0) { /* non continuation */
7525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = UCOL_PRIMARYMASK;
7526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sInShifted = FALSE;
7527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sInShifted = TRUE;
7529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            secS &= UCOL_PRIMARYMASK;
7532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
753427f654740f2a26ad62a5c155af9199af9e69b889claireho            while((secT == 0 && secT != UCOL_NO_MORE_CES) || (isContinuation(secT) && !tInShifted)) {
7535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                secT = *(tCE++);
7536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(isContinuation(secT)) {
7537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(!tInShifted) {
7538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        continue;
7539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(secT > LVT || (secT & UCOL_PRIMARYMASK) == 0) {
7541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secT = UCOL_PRIMARYMASK;
7542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tInShifted = FALSE;
7543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tInShifted = TRUE;
7545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            secT &= UCOL_PRIMARYMASK;
7548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(secS == secT) {
7550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(secS == UCOL_NO_MORE_CES_PRIMARY) {
7551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    secS = 0; secT = 0;
7554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
7558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto commonReturn;
7559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(doHiragana && hirResult != UCOL_EQUAL) {
7562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // If we're fine on quaternaries, we might be different
7563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // on Hiragana. This, however, might fail us in shifted.
7564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result = hirResult;
7565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        goto commonReturn;
7566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*  For IDENTICAL comparisons, we use a bitwise character comparison */
7569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*  as a tiebreaker if all else is equal.                                */
7570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*  Getting here  should be quite rare - strings are not identical -     */
7571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*     that is checked first, but compared == through all other checks.  */
7572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(checkIdent)
7573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
7574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //result = ucol_checkIdent(&sColl, &tColl, coll->normalizationMode == UCOL_ON);
7575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = ucol_checkIdent(sColl, tColl, TRUE, status);
7576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucommonReturn:
7579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((sColl->flags | tColl->flags) & UCOL_ITER_ALLOCATED) {
7580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (sCEs.buf != sCEs.localArray ) {
7581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(sCEs.buf);
7582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (tCEs.buf != tCEs.localArray ) {
7584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(tCEs.buf);
7585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
7589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
759150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult
759250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoucol_strcollRegular(const UCollator *coll,
759350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *source, int32_t sourceLength,
759450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *target, int32_t targetLength,
759550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UErrorCode *status) {
759650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    collIterate sColl, tColl;
759750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Preparing the context objects for iterating over strings
759850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, source, sourceLength, &sColl, status);
759950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, target, targetLength, &tColl, status);
760050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
760150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return UCOL_LESS;
760250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
760350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return ucol_strcollRegular(&sColl, &tColl, status);
760450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
7605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline uint32_t
7607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
7608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                          uint32_t CE, const UChar *s, int32_t *index, int32_t len)
7609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
7610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
7611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;
7612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t offset = 1;
7613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar schar = 0, tchar = 0;
7614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for(;;) {
7616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(len == -1) {
7617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(s[*index] == 0) { // end of string
7618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
7619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                schar = s[*index];
7621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
7623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(*index == len) {
7624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
7625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                schar = s[*index];
7627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(schar > (tchar = *(UCharOffset+offset))) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
7631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            offset++;
7632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (schar == tchar) {
7635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            (*index)++;
7636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset+offset]);
7637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        else
7639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
7640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(schar & 0xFF00 /*> UCOL_ENDOFLATIN1RANGE*/) {
7641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return UCOL_BAIL_OUT_CE;
7642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // skip completely ignorables
7644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
7645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(isZeroCE == 0) { // we have to ignore completely ignorables
7646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                (*index)++;
7647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                continue;
7648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
7651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
7657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is a fast strcoll, geared towards text in Latin-1.
7658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It supports contractions of size two, French secondaries
7659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and case switching. You can use it with strengths primary
7660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to tertiary. It does not support shifted and case level.
7661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It relies on the table build by setupLatin1Table. If it
7662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * doesn't understand something, it will go to the regular
7663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * strcoll.
7664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
766550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UCollationResult
7666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcollUseLatin1( const UCollator    *coll,
7667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UChar        *source,
7668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t            sLen,
7669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UChar        *target,
7670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t            tLen,
7671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode *status)
7672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
7673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ALIGN_CODE(16);
7674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t strength = coll->strength;
7675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sIndex = 0, tIndex = 0;
7677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar sChar = 0, tChar = 0;
7678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sOrder=0, tOrder=0;
7679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool endOfSource = FALSE;
7681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t *elements = coll->latinOneCEs;
7683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool haveContractions = FALSE; // if we have contractions in our string
7685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    // we cannot do French secondary
7686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Do the primary level
7688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(;;) {
7689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(sOrder==0) { // this loop skips primary ignorables
7690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // sOrder=getNextlatinOneCE(source);
7691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sLen==-1) {   // handling zero terminated strings
7692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sChar=source[sIndex++];
7693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sChar==0) {
7694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endOfSource = TRUE;
7695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {        // handling strings with known length
7698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sIndex==sLen) {
7699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endOfSource = TRUE;
7700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sChar=source[sIndex++];
7703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
7705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //fprintf(stderr, "R");
770650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
7707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sOrder = elements[sChar];
7709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sOrder >= UCOL_NOT_FOUND) { // if we got a special
7710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // specials can basically be either contractions or bail-out signs. If we get anything
7711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // else, we'll bail out anywasy
7712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(getCETag(sOrder) == CONTRACTION_TAG) {
7713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOrder, source, &sIndex, sLen);
7714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    haveContractions = TRUE; // if there are contractions, we cannot do French secondary
7715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // However, if there are contractions in the table, but we always use just one char,
7716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // we might be able to do French. This should be checked out.
7717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) {
7719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //fprintf(stderr, "S");
772050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
7721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while(tOrder==0) {  // this loop skips primary ignorables
7726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // tOrder=getNextlatinOneCE(target);
7727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(tLen==-1) {    // handling zero terminated strings
7728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tChar=target[tIndex++];
7729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tChar==0) {
7730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(endOfSource) { // this is different than source loop,
7731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // as we already know that source loop is done here,
7732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // so we can either finish the primary loop if both
7733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // strings are done or anounce the result if only
7734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        // target is done. Same below.
7735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto endOfPrimLoop;
7736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_GREATER;
7738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {          // handling strings with known length
7741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tIndex==tLen) {
7742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(endOfSource) {
7743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        goto endOfPrimLoop;
7744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_GREATER;
7746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tChar=target[tIndex++];
7749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
7751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //fprintf(stderr, "R");
775250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
7753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tOrder = elements[tChar];
7755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(tOrder >= UCOL_NOT_FOUND) {
7756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Handling specials, see the comments for source
7757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(getCETag(tOrder) == CONTRACTION_TAG) {
7758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOrder, target, &tIndex, tLen);
7759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    haveContractions = TRUE;
7760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) {
7762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //fprintf(stderr, "S");
776350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
7764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(endOfSource) { // source is finished, but target is not, say the result.
7768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return UCOL_LESS;
7769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
7770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(sOrder == tOrder) { // if we have same CEs, we continue the loop
7772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sOrder = 0; tOrder = 0;
7773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
7774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
7775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // compare current top bytes
7776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(((sOrder^tOrder)&0xFF000000)!=0) {
7777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // top bytes differ, return difference
7778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder < tOrder) {
7779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UCOL_LESS;
7780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else if(sOrder > tOrder) {
7781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UCOL_GREATER;
7782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
7784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // since we must return enum value
7785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // top bytes match, continue with following bytes
7788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sOrder<<=8;
7789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tOrder<<=8;
7790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruendOfPrimLoop:
7794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // after primary loop, we definitely know the sizes of strings,
7795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // so we set it and use simpler loop for secondaries and tertiaries
7796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    sLen = sIndex; tLen = tIndex;
7797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(strength >= UCOL_SECONDARY) {
7798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // adjust the table beggining
7799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        elements += coll->latinOneTableLen;
7800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        endOfSource = FALSE;
7801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if(coll->frenchCollation == UCOL_OFF) { // non French
7803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // This loop is a simplified copy of primary loop
7804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // at this point we know that whole strings are latin-1, so we don't
7805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // check for that. We also know that we only have contractions as
7806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // specials.
7807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sIndex = 0; tIndex = 0;
7808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(sOrder==0) {
7810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(sIndex==sLen) {
7811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        endOfSource = TRUE;
7812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sChar=source[sIndex++];
7815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = elements[sChar];
7816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(sOrder > UCOL_NOT_FOUND) {
7817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, sOrder, source, &sIndex, sLen);
7818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(tOrder==0) {
7822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tIndex==tLen) {
7823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(endOfSource) {
7824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto endOfSecLoop;
7825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_GREATER;
7827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tChar=target[tIndex++];
7830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = elements[tChar];
7831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tOrder > UCOL_NOT_FOUND) {
7832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, tOrder, target, &tIndex, tLen);
7833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(endOfSource) {
7836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UCOL_LESS;
7837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == tOrder) {
7840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = 0; tOrder = 0;
7841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // see primary loop for comments on this
7844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(((sOrder^tOrder)&0xFF000000)!=0) {
7845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(sOrder < tOrder) {
7846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_LESS;
7847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if(sOrder > tOrder) {
7848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_GREATER;
7849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder<<=8;
7852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder<<=8;
7853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else { // French
7856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(haveContractions) { // if we have contractions, we have to bail out
7857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // since we don't really know how to handle them here
785850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
7859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // For French, we go backwards
7861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sIndex = sLen; tIndex = tLen;
7862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            for(;;) {
7863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(sOrder==0) {
7864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(sIndex==0) {
7865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        endOfSource = TRUE;
7866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
7867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sChar=source[--sIndex];
7869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = elements[sChar];
7870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // don't even look for contractions
7871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                while(tOrder==0) {
7874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(tIndex==0) {
7875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(endOfSource) {
7876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            goto endOfSecLoop;
7877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else {
7878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_GREATER;
7879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tChar=target[--tIndex];
7882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = elements[tChar];
7883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // don't even look for contractions
7884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(endOfSource) {
7886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    return UCOL_LESS;
7887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder == tOrder) {
7890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = 0; tOrder = 0;
7891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    continue;
7892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
7893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // see the primary loop for comments
7894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(((sOrder^tOrder)&0xFF000000)!=0) {
7895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if(sOrder < tOrder) {
7896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_LESS;
7897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        } else if(sOrder > tOrder) {
7898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            return UCOL_GREATER;
7899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        }
7900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder<<=8;
7902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder<<=8;
7903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruendOfSecLoop:
7909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(strength >= UCOL_TERTIARY) {
7910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // tertiary loop is the same as secondary (except no French)
7911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        elements += coll->latinOneTableLen;
7912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sIndex = 0; tIndex = 0;
7913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        endOfSource = FALSE;
7914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for(;;) {
7915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while(sOrder==0) {
7916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sIndex==sLen) {
7917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    endOfSource = TRUE;
7918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
7919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sChar=source[sIndex++];
7921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder = elements[sChar];
7922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(sOrder > UCOL_NOT_FOUND) {
7923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sOrder, source, &sIndex, sLen);
7924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            while(tOrder==0) {
7927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tIndex==tLen) {
7928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(endOfSource) {
7929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_EQUAL; // if both strings are at the end, they are equal
7930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else {
7931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_GREATER;
7932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tChar=target[tIndex++];
7935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder = elements[tChar];
7936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(tOrder > UCOL_NOT_FOUND) {
7937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tOrder, target, &tIndex, tLen);
7938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
7940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(endOfSource) {
7941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return UCOL_LESS;
7942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sOrder == tOrder) {
7944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder = 0; tOrder = 0;
7945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                continue;
7946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
7947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if(((sOrder^tOrder)&0xff000000)!=0) {
7948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if(sOrder < tOrder) {
7949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_LESS;
7950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    } else if(sOrder > tOrder) {
7951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return UCOL_GREATER;
7952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
7953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
7954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                sOrder<<=8;
7955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                tOrder<<=8;
7956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
7957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
7958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
7959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return UCOL_EQUAL;
7960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
7961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationResult U_EXPORT2
7964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcollIter( const UCollator    *coll,
7965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 UCharIterator *sIter,
7966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 UCharIterator *tIter,
7967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                 UErrorCode         *status)
7968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
7969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(!status || U_FAILURE(*status)) {
7970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
7971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
7974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
7975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (sIter == tIter) {
7977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
7978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
7979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(sIter == NULL || tIter == NULL || coll == NULL) {
7981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
7982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
7983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
7984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
7985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UCollationResult result = UCOL_EQUAL;
7987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
7988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Preparing the context objects for iterating over strings
7989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    collIterate sColl, tColl;
799050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, NULL, -1, &sColl, status);
799150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IInit_collIterate(coll, NULL, -1, &tColl, status);
799250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*status)) {
799350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
799450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return UCOL_EQUAL;
799550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
7996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // The division for the array length may truncate the array size to
7997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
7998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // for all platforms anyway.
7999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
8000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
8001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UNormIterator *sNormIter = NULL, *tNormIter = NULL;
8002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    sColl.iterator = sIter;
8004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    sColl.flags |= UCOL_USE_ITERATOR;
8005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    tColl.flags |= UCOL_USE_ITERATOR;
8006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    tColl.iterator = tIter;
8007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) {
8009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
8010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status);
8011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        sColl.flags &= ~UCOL_ITER_NORM;
8012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
8014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status);
8015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tColl.flags &= ~UCOL_ITER_NORM;
8016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;
8019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    while((sChar = sColl.iterator->next(sColl.iterator)) ==
8021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        (tChar = tColl.iterator->next(tColl.iterator))) {
8022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if(sChar == U_SENTINEL) {
8023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                result = UCOL_EQUAL;
8024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto end_compare;
8025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(sChar == U_SENTINEL) {
8029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        tChar = tColl.iterator->previous(tColl.iterator);
8030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(tChar == U_SENTINEL) {
8033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sChar = sColl.iterator->previous(sColl.iterator);
8034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    sChar = sColl.iterator->previous(sColl.iterator);
8037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    tChar = tColl.iterator->previous(tColl.iterator);
8038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (ucol_unsafeCP((UChar)sChar, coll) || ucol_unsafeCP((UChar)tChar, coll))
8040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
8041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // We are stopped in the middle of a contraction.
8042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Scan backwards through the == part of the string looking for the start of the contraction.
8043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   It doesn't matter which string we scan, since they are the same in this region.
8044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        do
8045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
8046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sChar = sColl.iterator->previous(sColl.iterator);
8047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            tChar = tColl.iterator->previous(tColl.iterator);
8048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
8049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll));
8050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(U_SUCCESS(*status)) {
8054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        result = ucol_strcollRegular(&sColl, &tColl, status);
8055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruend_compare:
8058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(sNormIter || tNormIter) {
8059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(sNormIter);
8060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        unorm_closeIter(tNormIter);
8061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UTRACE_EXIT_VALUE_STATUS(result, *status)
8064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return result;
8065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                      */
8069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ucol_strcoll     Main public API string comparison function          */
8070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*                                                                      */
8071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCollationResult U_EXPORT2
8072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_strcoll( const UCollator    *coll,
8073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UChar        *source,
8074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t            sourceLength,
8075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UChar        *target,
8076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru              int32_t            targetLength)
8077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{
8078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ALIGN_CODE(16);
8079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
8081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
8082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
8083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
8084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
8085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(source == NULL || target == NULL) {
8088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // do not crash, but return. Should have
8089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // status argument to return error.
8090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_VALUE(UCOL_EQUAL);
8091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
8092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
8093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* Quick check if source and target are same strings. */
8095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /* They should either both be NULL terminated or the explicit length should be set on both. */
8096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (source==target && sourceLength==targetLength) {
8097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UTRACE_EXIT_VALUE(UCOL_EQUAL);
8098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return UCOL_EQUAL;
8099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Scan the strings.  Find:                                                             */
8102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*    The length of any leading portion that is equal                                   */
8103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*    Whether they are exactly equal.  (in which case we just return)                   */
8104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar    *pSrc    = source;
8105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar    *pTarg   = target;
8106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t        equalLength;
8107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sourceLength == -1 && targetLength == -1) {
8109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Both strings are null terminated.
8110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //    Scan through any leading equal portion.
8111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        while (*pSrc == *pTarg && *pSrc != 0) {
8112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pSrc++;
8113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pTarg++;
8114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (*pSrc == 0 && *pTarg == 0) {
8116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UTRACE_EXIT_VALUE(UCOL_EQUAL);
8117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return UCOL_EQUAL;
8118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
811950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        equalLength = (int32_t)(pSrc - source);
8120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else
8122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
8123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // One or both strings has an explicit length.
8124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar    *pSrcEnd = source + sourceLength;
8125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar    *pTargEnd = target + targetLength;
8126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Scan while the strings are bitwise ==, or until one is exhausted.
8128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for (;;) {
8129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (pSrc == pSrcEnd || pTarg == pTargEnd) {
8130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
8131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLength == -1)) {
8133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
8134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (*pSrc != *pTarg) {
8136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
8137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
8138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            pSrc++;
8139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            pTarg++;
8140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
814150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        equalLength = (int32_t)(pSrc - source);
8142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // If we made it all the way through both strings, we are done.  They are ==
8144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if ((pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0))  &&   /* At end of src string, however it was specified. */
8145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))     /* and also at end of dest string                  */
8146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        {
8147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UTRACE_EXIT_VALUE(UCOL_EQUAL);
8148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return UCOL_EQUAL;
8149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
8150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (equalLength > 0) {
8152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* There is an identical portion at the beginning of the two strings.        */
8153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*   If the identical portion ends within a contraction or a comibining      */
8154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*   character sequence, back up to the start of that sequence.              */
8155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
8156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // These values should already be set by the code above.
8157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //pSrc  = source + equalLength;        /* point to the first differing chars   */
8158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //pTarg = target + equalLength;
815927f654740f2a26ad62a5c155af9199af9e69b889claireho        if ((pSrc  != source+sourceLength && ucol_unsafeCP(*pSrc, coll)) ||
816027f654740f2a26ad62a5c155af9199af9e69b889claireho            (pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll)))
8161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
8162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We are stopped in the middle of a contraction.
8163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Scan backwards through the == part of the string looking for the start of the contraction.
8164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   It doesn't matter which string we scan, since they are the same in this region.
8165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do
8166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
8167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                equalLength--;
8168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pSrc--;
8169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
8170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while (equalLength>0 && ucol_unsafeCP(*pSrc, coll));
8171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source += equalLength;
8174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        target += equalLength;
8175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (sourceLength > 0) {
8176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sourceLength -= equalLength;
8177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (targetLength > 0) {
8179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            targetLength -= equalLength;
8180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
8181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
8184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UCollationResult returnVal;
8185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) {
818650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targetLength, &status);
8187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
8188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);
8189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTRACE_EXIT_VALUE(returnVal);
8191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return returnVal;
8192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */
8195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2
8196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_greater(    const    UCollator        *coll,
8197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar            *source,
8198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t            sourceLength,
8199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const    UChar            *target,
8200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t            targetLength)
8201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
8202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
8203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        == UCOL_GREATER);
8204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */
8207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2
8208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_greaterOrEqual(    const    UCollator    *coll,
8209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const    UChar        *source,
8210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t        sourceLength,
8211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const    UChar        *target,
8212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t        targetLength)
8213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
8214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
8215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        != UCOL_LESS);
8216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* convenience function for comparing strings */
8219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2
8220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_equal(        const    UCollator        *coll,
8221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const    UChar            *source,
8222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t            sourceLength,
8223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const    UChar            *target,
8224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t            targetLength)
8225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
8226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
8227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        == UCOL_EQUAL);
8228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
8231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
8232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(coll && coll->UCA) {
8233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo));
8234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
8235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
8236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */
8238