1/*
2 ******************************************************************************
3 *   Copyright (C) 1996-2009, International Business Machines                 *
4 *   Corporation and others.  All Rights Reserved.                            *
5 ******************************************************************************
6 */
7
8#include "unicode/utypes.h"
9
10#if !UCONFIG_NO_COLLATION
11
12#include "unicode/unistr.h"
13#include "unicode/putil.h"
14#include "unicode/usearch.h"
15
16#include "cmemory.h"
17#include "unicode/coll.h"
18#include "unicode/tblcoll.h"
19#include "unicode/coleitr.h"
20#include "unicode/ucoleitr.h"
21
22#include "unicode/regex.h"        // TODO: make conditional on regexp being built.
23
24#include "unicode/uniset.h"
25#include "unicode/uset.h"
26#include "unicode/ustring.h"
27#include "hash.h"
28#include "uhash.h"
29#include "ucln_in.h"
30#include "ucol_imp.h"
31#include "umutex.h"
32
33#include "unicode/colldata.h"
34
35U_NAMESPACE_BEGIN
36
37#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
38#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
39#define DELETE_ARRAY(array) uprv_free((void *) (array))
40#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
41
42UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CEList)
43
44#ifdef INSTRUMENT_CELIST
45int32_t CEList::_active = 0;
46int32_t CEList::_histogram[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
47#endif
48
49CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
50    : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0)
51{
52    UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
53    UCollationStrength strength = ucol_getStrength(coll);
54    UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
55    uint32_t variableTop = ucol_getVariableTop(coll, &status);
56    uint32_t strengthMask = 0;
57    int32_t order;
58
59    if (U_FAILURE(status)) {
60        return;
61    }
62
63    // **** only set flag if string has Han(gul) ****
64    ucol_forceHanImplicit(elems, &status);
65
66    switch (strength)
67    {
68    default:
69        strengthMask |= UCOL_TERTIARYORDERMASK;
70        /* fall through */
71
72    case UCOL_SECONDARY:
73        strengthMask |= UCOL_SECONDARYORDERMASK;
74        /* fall through */
75
76    case UCOL_PRIMARY:
77        strengthMask |= UCOL_PRIMARYORDERMASK;
78    }
79
80#ifdef INSTRUMENT_CELIST
81    _active += 1;
82    _histogram[0] += 1;
83#endif
84
85    ces = ceBuffer;
86
87    while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
88        UBool cont = isContinuation(order);
89
90        order &= strengthMask;
91
92        if (toShift && variableTop > (uint32_t)order && (order & UCOL_PRIMARYORDERMASK) != 0) {
93            if (strength >= UCOL_QUATERNARY) {
94                order &= UCOL_PRIMARYORDERMASK;
95            } else {
96                order = UCOL_IGNORABLE;
97            }
98        }
99
100        if (order == UCOL_IGNORABLE) {
101            continue;
102        }
103
104        if (cont) {
105            order |= UCOL_CONTINUATION_MARKER;
106        }
107
108        add(order, status);
109    }
110
111    ucol_closeElements(elems);
112}
113
114CEList::~CEList()
115{
116#ifdef INSTRUMENT_CELIST
117    _active -= 1;
118#endif
119
120    if (ces != ceBuffer) {
121        DELETE_ARRAY(ces);
122    }
123}
124
125void CEList::add(uint32_t ce, UErrorCode &status)
126{
127    if (U_FAILURE(status)) {
128        return;
129    }
130
131    if (listSize >= listMax) {
132        int32_t newMax = listMax + CELIST_BUFFER_SIZE;
133
134#ifdef INSTRUMENT_CELIST
135        _histogram[listSize / CELIST_BUFFER_SIZE] += 1;
136#endif
137
138        uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax);
139
140        if (newCEs == NULL) {
141            status = U_MEMORY_ALLOCATION_ERROR;
142            return;
143        }
144
145        uprv_memcpy(newCEs, ces, listSize * sizeof(uint32_t));
146
147        if (ces != ceBuffer) {
148            DELETE_ARRAY(ces);
149        }
150
151        ces = newCEs;
152        listMax = newMax;
153    }
154
155    ces[listSize++] = ce;
156}
157
158uint32_t CEList::get(int32_t index) const
159{
160    if (index >= 0 && index < listSize) {
161        return ces[index];
162    }
163
164    return UCOL_NULLORDER;
165}
166
167uint32_t &CEList::operator[](int32_t index) const
168{
169    return ces[index];
170}
171
172UBool CEList::matchesAt(int32_t offset, const CEList *other) const
173{
174    if (other == NULL || listSize - offset < other->size()) {
175        return FALSE;
176    }
177
178    for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) {
179        if (ces[i] != (*other)[j]) {
180            return FALSE;
181        }
182    }
183
184    return TRUE;
185}
186
187int32_t CEList::size() const
188{
189    return listSize;
190}
191
192UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringList)
193
194#ifdef INSTRUMENT_STRING_LIST
195int32_t StringList::_lists = 0;
196int32_t StringList::_strings = 0;
197int32_t StringList::_histogram[101] = {0};
198#endif
199
200StringList::StringList(UErrorCode &status)
201    : strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0)
202{
203    if (U_FAILURE(status)) {
204        return;
205    }
206
207    strings = new UnicodeString [listMax];
208
209    if (strings == NULL) {
210        status = U_MEMORY_ALLOCATION_ERROR;
211        return;
212    }
213
214#ifdef INSTRUMENT_STRING_LIST
215    _lists += 1;
216    _histogram[0] += 1;
217#endif
218}
219
220StringList::~StringList()
221{
222    delete[] strings;
223}
224
225void StringList::add(const UnicodeString *string, UErrorCode &status)
226{
227    if (U_FAILURE(status)) {
228        return;
229    }
230
231#ifdef INSTRUMENT_STRING_LIST
232    _strings += 1;
233#endif
234
235    if (listSize >= listMax) {
236        int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE;
237
238        UnicodeString *newStrings = new UnicodeString[newMax];
239
240        uprv_memcpy(newStrings, strings, listSize * sizeof(UnicodeString));
241
242#ifdef INSTRUMENT_STRING_LIST
243        int32_t _h = listSize / STRING_LIST_BUFFER_SIZE;
244
245        if (_h > 100) {
246            _h = 100;
247        }
248
249        _histogram[_h] += 1;
250#endif
251
252        delete[] strings;
253        strings = newStrings;
254        listMax = newMax;
255    }
256
257    // The ctor initialized all the strings in
258    // the array to empty strings, so this
259    // is the same as copying the source string.
260    strings[listSize++].append(*string);
261}
262
263void StringList::add(const UChar *chars, int32_t count, UErrorCode &status)
264{
265    const UnicodeString string(chars, count);
266
267    add(&string, status);
268}
269
270const UnicodeString *StringList::get(int32_t index) const
271{
272    if (index >= 0 && index < listSize) {
273        return &strings[index];
274    }
275
276    return NULL;
277}
278
279int32_t StringList::size() const
280{
281    return listSize;
282}
283
284
285U_CFUNC void deleteStringList(void *obj);
286
287class CEToStringsMap : public UMemory
288{
289public:
290
291    CEToStringsMap(UErrorCode &status);
292    ~CEToStringsMap();
293
294    void put(uint32_t ce, UnicodeString *string, UErrorCode &status);
295    StringList *getStringList(uint32_t ce) const;
296
297private:
298
299    void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status);
300    UHashtable *map;
301};
302
303CEToStringsMap::CEToStringsMap(UErrorCode &status)
304    : map(NULL)
305{
306    if (U_FAILURE(status)) {
307        return;
308    }
309
310    map = uhash_open(uhash_hashLong, uhash_compareLong,
311                     uhash_compareCaselessUnicodeString,
312                     &status);
313
314    if (U_FAILURE(status)) {
315        return;
316    }
317
318    uhash_setValueDeleter(map, deleteStringList);
319}
320
321CEToStringsMap::~CEToStringsMap()
322{
323    uhash_close(map);
324}
325
326void CEToStringsMap::put(uint32_t ce, UnicodeString *string, UErrorCode &status)
327{
328    StringList *strings = getStringList(ce);
329
330    if (strings == NULL) {
331        strings = new StringList(status);
332
333        if (strings == NULL || U_FAILURE(status)) {
334            status = U_MEMORY_ALLOCATION_ERROR;
335            return;
336        }
337
338        putStringList(ce, strings, status);
339    }
340
341    strings->add(string, status);
342}
343
344StringList *CEToStringsMap::getStringList(uint32_t ce) const
345{
346    return (StringList *) uhash_iget(map, ce);
347}
348
349void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCode &status)
350{
351    uhash_iput(map, ce, (void *) stringList, &status);
352}
353
354U_CFUNC void deleteStringList(void *obj)
355{
356    StringList *strings = (StringList *) obj;
357
358    delete strings;
359}
360
361U_CFUNC void deleteCEList(void *obj);
362U_CFUNC void deleteUnicodeStringKey(void *obj);
363
364class StringToCEsMap : public UMemory
365{
366public:
367    StringToCEsMap(UErrorCode &status);
368    ~StringToCEsMap();
369
370    void put(const UnicodeString *string, const CEList *ces, UErrorCode &status);
371    const CEList *get(const UnicodeString *string);
372    void free(const CEList *list);
373
374private:
375
376
377    UHashtable *map;
378};
379
380StringToCEsMap::StringToCEsMap(UErrorCode &status)
381    : map(NULL)
382{
383    if (U_FAILURE(status)) {
384        return;
385    }
386
387    map = uhash_open(uhash_hashUnicodeString,
388                     uhash_compareUnicodeString,
389                     uhash_compareLong,
390                     &status);
391
392    if (U_FAILURE(status)) {
393        return;
394    }
395
396    uhash_setValueDeleter(map, deleteCEList);
397    uhash_setKeyDeleter(map, deleteUnicodeStringKey);
398}
399
400StringToCEsMap::~StringToCEsMap()
401{
402    uhash_close(map);
403}
404
405void StringToCEsMap::put(const UnicodeString *string, const CEList *ces, UErrorCode &status)
406{
407    uhash_put(map, (void *) string, (void *) ces, &status);
408}
409
410const CEList *StringToCEsMap::get(const UnicodeString *string)
411{
412    return (const CEList *) uhash_get(map, string);
413}
414
415U_CFUNC void deleteCEList(void *obj)
416{
417    CEList *list = (CEList *) obj;
418
419    delete list;
420}
421
422U_CFUNC void deleteUnicodeStringKey(void *obj)
423{
424    UnicodeString *key = (UnicodeString *) obj;
425
426    delete key;
427}
428
429class CollDataCacheEntry : public UMemory
430{
431public:
432    CollDataCacheEntry(CollData *theData);
433    ~CollDataCacheEntry();
434
435    CollData *data;
436    int32_t   refCount;
437};
438
439CollDataCacheEntry::CollDataCacheEntry(CollData *theData)
440    : data(theData), refCount(1)
441{
442    // nothing else to do
443}
444
445CollDataCacheEntry::~CollDataCacheEntry()
446{
447    // check refCount?
448    delete data;
449}
450
451class CollDataCache : public UMemory
452{
453public:
454    CollDataCache(UErrorCode &status);
455    ~CollDataCache();
456
457    CollData *get(UCollator *collator, UErrorCode &status);
458    void unref(CollData *collData);
459
460    void flush();
461
462private:
463    static char *getKey(UCollator *collator, char *keyBuffer, int32_t *charBufferLength);
464    static void deleteKey(char *key);
465
466    UMTX lock;
467    UHashtable *cache;
468};
469
470U_CFUNC void deleteChars(void */*obj*/)
471{
472    // char *chars = (char *) obj;
473    // All the key strings are owned by the
474    // CollData objects and don't need to
475    // be freed here.
476  //DELETE_ARRAY(chars);
477}
478
479U_CFUNC void deleteCollDataCacheEntry(void *obj)
480{
481    CollDataCacheEntry *entry = (CollDataCacheEntry *) obj;
482
483    delete entry;
484}
485
486CollDataCache::CollDataCache(UErrorCode &status)
487    : lock(0), cache(NULL)
488{
489    if (U_FAILURE(status)) {
490        return;
491    }
492
493    umtx_init(&lock);
494
495    cache = uhash_open(uhash_hashChars, uhash_compareChars, uhash_compareLong, &status);
496
497    if (U_FAILURE(status)) {
498        return;
499    }
500
501    uhash_setValueDeleter(cache, deleteCollDataCacheEntry);
502    uhash_setKeyDeleter(cache, deleteChars);
503}
504
505CollDataCache::~CollDataCache()
506{
507    umtx_lock(&lock);
508    uhash_close(cache);
509    cache = NULL;
510    umtx_unlock(&lock);
511
512    umtx_destroy(&lock);
513}
514
515CollData *CollDataCache::get(UCollator *collator, UErrorCode &status)
516{
517    char keyBuffer[KEY_BUFFER_SIZE];
518    int32_t keyLength = KEY_BUFFER_SIZE;
519    char *key = getKey(collator, keyBuffer, &keyLength);
520    CollData *result = NULL, *newData = NULL;
521    CollDataCacheEntry *entry = NULL, *newEntry = NULL;
522
523    umtx_lock(&lock);
524    entry = (CollDataCacheEntry *) uhash_get(cache, key);
525
526    if (entry == NULL) {
527        umtx_unlock(&lock);
528
529        newData = new CollData(collator, key, keyLength, status);
530        newEntry = new CollDataCacheEntry(newData);
531
532        if (U_FAILURE(status) || newData == NULL || newEntry == NULL) {
533            status = U_MEMORY_ALLOCATION_ERROR;
534            return NULL;
535        }
536
537        umtx_lock(&lock);
538        entry = (CollDataCacheEntry *) uhash_get(cache, key);
539
540        if (entry == NULL) {
541            uhash_put(cache, newData->key, newEntry, &status);
542            umtx_unlock(&lock);
543
544            if (U_FAILURE(status)) {
545                delete newEntry;
546                delete newData;
547
548                return NULL;
549            }
550
551            return newData;
552        }
553    }
554
555    result = entry->data;
556    entry->refCount += 1;
557    umtx_unlock(&lock);
558
559    if (key != keyBuffer) {
560        deleteKey(key);
561    }
562
563    if (newEntry != NULL) {
564        delete newEntry;
565        delete newData;
566    }
567
568    return result;
569}
570
571void CollDataCache::unref(CollData *collData)
572{
573    CollDataCacheEntry *entry = NULL;
574
575    umtx_lock(&lock);
576    entry = (CollDataCacheEntry *) uhash_get(cache, collData->key);
577
578    if (entry != NULL) {
579        entry->refCount -= 1;
580    }
581    umtx_unlock(&lock);
582}
583
584char *CollDataCache::getKey(UCollator *collator, char *keyBuffer, int32_t *keyBufferLength)
585{
586    UErrorCode status = U_ZERO_ERROR;
587    int32_t len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
588
589    if (len >= *keyBufferLength) {
590        *keyBufferLength = (len + 2) & ~1;  // round to even length, leaving room for terminating null
591        keyBuffer = NEW_ARRAY(char, *keyBufferLength);
592        status = U_ZERO_ERROR;
593
594        len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
595    }
596
597    keyBuffer[len] = '\0';
598
599    return keyBuffer;
600}
601
602void CollDataCache::flush()
603{
604    const UHashElement *element;
605    int32_t pos = -1;
606
607    umtx_lock(&lock);
608    while ((element = uhash_nextElement(cache, &pos)) != NULL) {
609        CollDataCacheEntry *entry = (CollDataCacheEntry *) element->value.pointer;
610
611        if (entry->refCount <= 0) {
612            uhash_removeElement(cache, element);
613        }
614    }
615    umtx_unlock(&lock);
616}
617
618void CollDataCache::deleteKey(char *key)
619{
620    DELETE_ARRAY(key);
621}
622
623U_CDECL_BEGIN
624static UBool coll_data_cleanup(void) {
625    CollData::freeCollDataCache();
626  return TRUE;
627}
628U_CDECL_END
629
630UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollData)
631
632CollData::CollData()
633{
634    // nothing
635}
636
637#define CLONE_COLLATOR
638
639//#define CACHE_CELISTS
640CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, UErrorCode &status)
641    : coll(NULL), charsToCEList(NULL), ceToCharsStartingWith(NULL), key(NULL)
642{
643    // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]]
644    // i.e. other, control, private use, format, surrogate
645    U_STRING_DECL(test_pattern, "[[:assigned:]-[:c:]]", 20);
646    U_STRING_INIT(test_pattern, "[[:assigned:]-[:c:]]", 20);
647    USet *charsToTest = uset_openPattern(test_pattern, 20, &status);
648
649    // Han ext. A, Han, Jamo, Hangul, Han Ext. B
650    // i.e. all the characers we handle implicitly
651    U_STRING_DECL(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70);
652    U_STRING_INIT(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70);
653    USet *charsToRemove = uset_openPattern(remove_pattern, 70, &status);
654
655    if (U_FAILURE(status)) {
656        return;
657    }
658
659    USet *expansions   = uset_openEmpty();
660    USet *contractions = uset_openEmpty();
661    int32_t itemCount;
662
663#ifdef CACHE_CELISTS
664    charsToCEList = new StringToCEsMap(status);
665
666    if (U_FAILURE(status)) {
667        goto bail;
668    }
669#else
670    charsToCEList = NULL;
671#endif
672
673    ceToCharsStartingWith = new CEToStringsMap(status);
674
675    if (U_FAILURE(status)) {
676        goto bail;
677    }
678
679    if (cacheKeyLength > KEY_BUFFER_SIZE) {
680        key = NEW_ARRAY(char, cacheKeyLength);
681
682        if (key == NULL) {
683            status = U_MEMORY_ALLOCATION_ERROR;
684            goto bail;
685        }
686    } else {
687        key = keyBuffer;
688    }
689
690    ARRAY_COPY(key, cacheKey, cacheKeyLength);
691
692#ifdef CLONE_COLLATOR
693    coll = ucol_safeClone(collator, NULL, NULL, &status);
694
695    if (U_FAILURE(status)) {
696        goto bail;
697    }
698#else
699    coll = collator;
700#endif
701
702    ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
703
704    uset_addAll(charsToTest, contractions);
705    uset_addAll(charsToTest, expansions);
706    uset_removeAll(charsToTest, charsToRemove);
707
708    itemCount = uset_getItemCount(charsToTest);
709    for(int32_t item = 0; item < itemCount; item += 1) {
710        UChar32 start = 0, end = 0;
711        UChar buffer[16];
712        int32_t len = uset_getItem(charsToTest, item, &start, &end,
713                                   buffer, 16, &status);
714
715        if (len == 0) {
716            for (UChar32 ch = start; ch <= end; ch += 1) {
717                UnicodeString *st = new UnicodeString(ch);
718
719                if (st == NULL) {
720                    status = U_MEMORY_ALLOCATION_ERROR;
721                    break;
722                }
723
724                CEList *ceList = new CEList(coll, *st, status);
725
726                ceToCharsStartingWith->put(ceList->get(0), st, status);
727
728#ifdef CACHE_CELISTS
729                charsToCEList->put(st, ceList, status);
730#else
731                delete ceList;
732                delete st;
733#endif
734            }
735        } else if (len > 0) {
736            UnicodeString *st = new UnicodeString(buffer, len);
737
738            if (st == NULL) {
739                status = U_MEMORY_ALLOCATION_ERROR;
740                break;
741            }
742
743            CEList *ceList = new CEList(coll, *st, status);
744
745            ceToCharsStartingWith->put(ceList->get(0), st, status);
746
747#ifdef CACHE_CELISTS
748            charsToCEList->put(st, ceList, status);
749#else
750            delete ceList;
751            delete st;
752#endif
753        } else {
754            // shouldn't happen...
755        }
756
757        if (U_FAILURE(status)) {
758             break;
759        }
760    }
761
762bail:
763    uset_close(contractions);
764    uset_close(expansions);
765    uset_close(charsToRemove);
766    uset_close(charsToTest);
767
768    if (U_FAILURE(status)) {
769        return;
770    }
771
772     UChar32 hanRanges[] = {UCOL_FIRST_HAN, UCOL_LAST_HAN, UCOL_FIRST_HAN_COMPAT, UCOL_LAST_HAN_COMPAT, UCOL_FIRST_HAN_A, UCOL_LAST_HAN_A,
773                            UCOL_FIRST_HAN_B, UCOL_LAST_HAN_B};
774     UChar  jamoRanges[] = {UCOL_FIRST_L_JAMO, UCOL_FIRST_V_JAMO, UCOL_FIRST_T_JAMO, UCOL_LAST_T_JAMO};
775     UnicodeString hanString = UnicodeString::fromUTF32(hanRanges, ARRAY_SIZE(hanRanges));
776     UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges));
777     CEList hanList(coll, hanString, status);
778     CEList jamoList(coll, jamoString, status);
779     int32_t j = 0;
780
781     if (U_FAILURE(status)) {
782         return;
783     }
784
785     for (int32_t c = 0; c < jamoList.size(); c += 1) {
786         uint32_t jce = jamoList[c];
787
788         if (! isContinuation(jce)) {
789             jamoLimits[j++] = jce;
790         }
791     }
792
793     jamoLimits[3] += (1 << UCOL_PRIMARYORDERSHIFT);
794
795     minHan = 0xFFFFFFFF;
796     maxHan = 0;
797
798     for(int32_t h = 0; h < hanList.size(); h += 2) {
799         uint32_t han = (uint32_t) hanList[h];
800
801         if (han < minHan) {
802             minHan = han;
803         }
804
805         if (han > maxHan) {
806             maxHan = han;
807         }
808     }
809
810     maxHan += (1 << UCOL_PRIMARYORDERSHIFT);
811}
812
813CollData::~CollData()
814{
815#ifdef CLONE_COLLATOR
816   ucol_close(coll);
817#endif
818
819   if (key != keyBuffer) {
820       DELETE_ARRAY(key);
821   }
822
823   delete ceToCharsStartingWith;
824
825#ifdef CACHE_CELISTS
826   delete charsToCEList;
827#endif
828}
829
830UCollator *CollData::getCollator() const
831{
832    return coll;
833}
834
835const StringList *CollData::getStringList(int32_t ce) const
836{
837    return ceToCharsStartingWith->getStringList(ce);
838}
839
840const CEList *CollData::getCEList(const UnicodeString *string) const
841{
842#ifdef CACHE_CELISTS
843    return charsToCEList->get(string);
844#else
845    UErrorCode status = U_ZERO_ERROR;
846    const CEList *list = new CEList(coll, *string, status);
847
848    if (U_FAILURE(status)) {
849        delete list;
850        list = NULL;
851    }
852
853    return list;
854#endif
855}
856
857void CollData::freeCEList(const CEList *list)
858{
859#ifndef CACHE_CELISTS
860    delete list;
861#endif
862}
863
864int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const
865{
866    // find out shortest string for the longest sequence of ces.
867    // this can probably be folded with the minLengthCache...
868
869    if (history[offset] >= 0) {
870        return history[offset];
871    }
872
873    uint32_t ce = ceList->get(offset);
874    int32_t maxOffset = ceList->size();
875    int32_t shortestLength = INT32_MAX;
876    const StringList *strings = ceToCharsStartingWith->getStringList(ce);
877
878    if (strings != NULL) {
879        int32_t stringCount = strings->size();
880
881        for (int32_t s = 0; s < stringCount; s += 1) {
882            const UnicodeString *string = strings->get(s);
883#ifdef CACHE_CELISTS
884            const CEList *ceList2 = charsToCEList->get(string);
885#else
886            UErrorCode status = U_ZERO_ERROR;
887            const CEList *ceList2 = new CEList(coll, *string, status);
888
889            if (U_FAILURE(status)) {
890                delete ceList2;
891                ceList2 = NULL;
892            }
893#endif
894
895            if (ceList->matchesAt(offset, ceList2)) {
896                int32_t clength = ceList2->size();
897                int32_t slength = string->length();
898                int32_t roffset = offset + clength;
899                int32_t rlength = 0;
900
901                if (roffset < maxOffset) {
902                    rlength = minLengthInChars(ceList, roffset, history);
903
904                    if (rlength <= 0) {
905                    // delete before continue to avoid memory leak.
906#ifndef CACHE_CELISTS
907                        delete ceList2;
908#endif
909                        // ignore any dead ends
910                        continue;
911                    }
912                }
913
914                if (shortestLength > slength + rlength) {
915                    shortestLength = slength + rlength;
916                }
917            }
918
919#ifndef CACHE_CELISTS
920            delete ceList2;
921#endif
922        }
923    }
924
925    if (shortestLength == INT32_MAX) {
926        // No matching strings at this offset. See if
927        // the CE is in a range we can handle manually.
928        if (ce >= minHan && ce < maxHan) {
929            // all han have implicit orders which
930            // generate two CEs.
931            int32_t roffset = offset + 2;
932            int32_t rlength = 0;
933
934          //history[roffset++] = -1;
935          //history[roffset++] = 1;
936
937            if (roffset < maxOffset) {
938                rlength = minLengthInChars(ceList, roffset, history);
939            }
940
941            if (rlength < 0) {
942                return -1;
943            }
944
945            shortestLength = 1 + rlength;
946            goto have_shortest;
947        } else if (ce >= jamoLimits[0] && ce < jamoLimits[3]) {
948            int32_t roffset = offset;
949            int32_t rlength = 0;
950
951            // **** this loop may not handle archaic Hangul correctly ****
952            for (int32_t j = 0; roffset < maxOffset && j < 4; j += 1, roffset += 1) {
953                uint32_t jce = ceList->get(roffset);
954
955                // Some Jamo have 24-bit primary order; skip the
956                // 2nd CE. This should always be OK because if
957                // we're still in the loop all we've seen are
958                // a series of Jamo in LVT order.
959                if (isContinuation(jce)) {
960                    continue;
961                }
962
963                if (j >= 3 || jce < jamoLimits[j] || jce >= jamoLimits[j + 1]) {
964                    break;
965                }
966            }
967
968            if (roffset == offset) {
969                // we started with a non-L Jamo...
970                // just say it comes from a single character
971                roffset += 1;
972
973                // See if the single Jamo has a 24-bit order.
974                if (roffset < maxOffset && isContinuation(ceList->get(roffset))) {
975                    roffset += 1;
976                }
977            }
978
979            if (roffset < maxOffset) {
980                rlength = minLengthInChars(ceList, roffset, history);
981            }
982
983            if (rlength < 0) {
984                return -1;
985            }
986
987            shortestLength = 1 + rlength;
988            goto have_shortest;
989        }
990
991        // Can't handle it manually either. Just move on.
992        return -1;
993    }
994
995have_shortest:
996    history[offset] = shortestLength;
997
998    return shortestLength;
999}
1000
1001int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const
1002{
1003    int32_t clength = ceList->size();
1004    int32_t *history = NEW_ARRAY(int32_t, clength);
1005
1006    for (int32_t i = 0; i < clength; i += 1) {
1007        history[i] = -1;
1008    }
1009
1010    int32_t minLength = minLengthInChars(ceList, offset, history);
1011
1012    DELETE_ARRAY(history);
1013
1014    return minLength;
1015}
1016
1017CollData *CollData::open(UCollator *collator, UErrorCode &status)
1018{
1019    if (U_FAILURE(status)) {
1020        return NULL;
1021    }
1022
1023    CollDataCache *cache = getCollDataCache();
1024
1025    return cache->get(collator, status);
1026}
1027
1028void CollData::close(CollData *collData)
1029{
1030    CollDataCache *cache = getCollDataCache();
1031
1032    cache->unref(collData);
1033}
1034
1035CollDataCache *CollData::collDataCache = NULL;
1036
1037CollDataCache *CollData::getCollDataCache()
1038{
1039    UErrorCode status = U_ZERO_ERROR;
1040    CollDataCache *cache = NULL;
1041
1042    UMTX_CHECK(NULL, collDataCache, cache);
1043
1044    if (cache == NULL) {
1045        cache = new CollDataCache(status);
1046
1047        if (U_FAILURE(status)) {
1048            delete cache;
1049            return NULL;
1050        }
1051
1052        umtx_lock(NULL);
1053        if (collDataCache == NULL) {
1054            collDataCache = cache;
1055
1056            ucln_i18n_registerCleanup(UCLN_I18N_COLL_DATA, coll_data_cleanup);
1057        }
1058        umtx_unlock(NULL);
1059
1060        if (collDataCache != cache) {
1061            delete cache;
1062        }
1063    }
1064
1065    return collDataCache;
1066}
1067
1068void CollData::freeCollDataCache()
1069{
1070    CollDataCache *cache = NULL;
1071
1072    UMTX_CHECK(NULL, collDataCache, cache);
1073
1074    if (cache != NULL) {
1075        umtx_lock(NULL);
1076        if (collDataCache != NULL) {
1077            collDataCache = NULL;
1078        } else {
1079            cache = NULL;
1080        }
1081        umtx_unlock(NULL);
1082
1083        delete cache;
1084    }
1085}
1086
1087void CollData::flushCollDataCache()
1088{
1089    CollDataCache *cache = NULL;
1090
1091    UMTX_CHECK(NULL, collDataCache, cache);
1092
1093    // **** this will fail if the another ****
1094    // **** thread deletes the cache here ****
1095    if (cache != NULL) {
1096        cache->flush();
1097    }
1098}
1099
1100U_NAMESPACE_END
1101
1102#endif // #if !UCONFIG_NO_COLLATION
1103