ucol_res.cpp revision b13da9df870a61b11249bf741347908dbea0edd8
1/*
2*******************************************************************************
3*   Copyright (C) 1996-2007, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  ucol_res.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11* Description:
12* This file contains dependencies that the collation run-time doesn't normally
13* need. This mainly contains resource bundle usage and collation meta information
14*
15* Modification history
16* Date        Name      Comments
17* 1996-1999   various members of ICU team maintained C API for collation framework
18* 02/16/2001  synwee    Added internal method getPrevSpecialCE
19* 03/01/2001  synwee    Added maxexpansion functionality.
20* 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
21* 12/08/2004  grhoten   Split part of ucol.cpp into ucol_res.cpp
22*/
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_COLLATION
27#include "unicode/uloc.h"
28#include "unicode/coll.h"
29#include "unicode/tblcoll.h"
30#include "unicode/caniter.h"
31#include "unicode/ustring.h"
32
33#include "ucol_bld.h"
34#include "ucol_imp.h"
35#include "ucol_tok.h"
36#include "ucol_elm.h"
37#include "uresimp.h"
38#include "ustr_imp.h"
39#include "cstring.h"
40#include "umutex.h"
41#include "ustrenum.h"
42#include "putilimp.h"
43#include "utracimp.h"
44#include "cmemory.h"
45
46U_NAMESPACE_USE
47
48U_CDECL_BEGIN
49static void U_CALLCONV
50ucol_prv_closeResources(UCollator *coll) {
51    if(coll->rb != NULL) { /* pointing to read-only memory */
52        ures_close(coll->rb);
53    }
54    if(coll->elements != NULL) {
55        ures_close(coll->elements);
56    }
57}
58U_CDECL_END
59
60/****************************************************************************/
61/* Following are the open/close functions                                   */
62/*                                                                          */
63/****************************************************************************/
64static UCollator*
65tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
66    int32_t rulesLen = 0;
67    const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
68    return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
69
70}
71
72
73// API in ucol_imp.h
74
75U_CFUNC UCollator*
76ucol_open_internal(const char *loc,
77                   UErrorCode *status)
78{
79    const UCollator* UCA = ucol_initUCA(status);
80
81    /* New version */
82    if(U_FAILURE(*status)) return 0;
83
84
85
86    UCollator *result = NULL;
87    UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
88
89    /* we try to find stuff from keyword */
90    UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
91    UResourceBundle *collElem = NULL;
92    char keyBuffer[256];
93    // if there is a keyword, we pick it up and try to get elements
94    if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
95        // no keyword. we try to find the default setting, which will give us the keyword value
96        UErrorCode intStatus = U_ZERO_ERROR;
97        // finding default value does not affect collation fallback status
98        UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
99        if(U_SUCCESS(intStatus)) {
100            int32_t defaultKeyLen = 0;
101            const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
102            u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
103            keyBuffer[defaultKeyLen] = 0;
104        } else {
105            *status = U_INTERNAL_PROGRAM_ERROR;
106            return NULL;
107        }
108        ures_close(defaultColl);
109    }
110    collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
111
112    UResourceBundle *binary = NULL;
113
114    if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
115        *status = U_USING_DEFAULT_WARNING;
116        result = ucol_initCollator(UCA->image, result, UCA, status);
117        // if we use UCA, real locale is root
118        result->rb = ures_open(U_ICUDATA_COLL, "", status);
119        result->elements = ures_open(U_ICUDATA_COLL, "", status);
120        if(U_FAILURE(*status)) {
121            goto clean;
122        }
123        ures_close(b);
124        result->hasRealData = FALSE;
125    } else if(U_SUCCESS(*status)) {
126        int32_t len = 0;
127        UErrorCode binaryStatus = U_ZERO_ERROR;
128
129        binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
130
131        if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
132            binary = NULL;
133            result = tryOpeningFromRules(collElem, status);
134            if(U_FAILURE(*status)) {
135                goto clean;
136            }
137        } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
138            const uint8_t *inData = ures_getBinary(binary, &len, status);
139            UCATableHeader *colData = (UCATableHeader *)inData;
140            if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
141                uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
142                colData->version[0] != UCOL_BUILDER_VERSION)
143            {
144                *status = U_DIFFERENT_UCA_VERSION;
145                result = tryOpeningFromRules(collElem, status);
146            } else {
147                if(U_FAILURE(*status)){
148                    goto clean;
149                }
150                if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
151                    result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
152                    if(U_FAILURE(*status)){
153                        goto clean;
154                    }
155                    result->hasRealData = TRUE;
156                } else {
157                    result = ucol_initCollator(UCA->image, result, UCA, status);
158                    ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
159                    if(U_FAILURE(*status)){
160                        goto clean;
161                    }
162                    result->hasRealData = FALSE;
163                }
164                result->freeImageOnClose = FALSE;
165            }
166        }
167        result->rb = b;
168        result->elements = collElem;
169        len = 0;
170        binaryStatus = U_ZERO_ERROR;
171        result->rules = ures_getStringByKey(result->elements, "Sequence", &len, &binaryStatus);
172        result->rulesLength = len;
173        result->freeRulesOnClose = FALSE;
174    } else { /* There is another error, and we're just gonna clean up */
175        goto clean;
176    }
177
178    result->validLocale = NULL; // default is to use rb info
179
180    if(loc == NULL) {
181        loc = ures_getLocale(result->rb, status);
182    }
183    result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
184    /* test for NULL */
185    if (result->requestedLocale == NULL) {
186        *status = U_MEMORY_ALLOCATION_ERROR;
187        goto clean;
188    }
189    uprv_strcpy(result->requestedLocale, loc);
190
191    ures_close(binary);
192    ures_close(collations); //??? we have to decide on that. Probably affects something :)
193    result->resCleaner = ucol_prv_closeResources;
194    return result;
195
196clean:
197    ures_close(b);
198    ures_close(collElem);
199    ures_close(collations);
200    ures_close(binary);
201    return NULL;
202}
203
204U_CAPI UCollator*
205ucol_open(const char *loc,
206          UErrorCode *status)
207{
208    U_NAMESPACE_USE
209
210    UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
211    UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
212    UCollator *result = NULL;
213
214    u_init(status);
215#if !UCONFIG_NO_SERVICE
216    result = Collator::createUCollator(loc, status);
217    if (result == NULL)
218#endif
219    {
220        result = ucol_open_internal(loc, status);
221    }
222    UTRACE_EXIT_PTR_STATUS(result, *status);
223    return result;
224}
225
226U_CAPI UCollator* U_EXPORT2
227ucol_openRules( const UChar        *rules,
228               int32_t            rulesLength,
229               UColAttributeValue normalizationMode,
230               UCollationStrength strength,
231               UParseError        *parseError,
232               UErrorCode         *status)
233{
234    UColTokenParser src;
235    UColAttributeValue norm;
236    UParseError tErr;
237
238    if(status == NULL || U_FAILURE(*status)){
239        return 0;
240    }
241
242    u_init(status);
243    if (U_FAILURE(*status)) {
244        return NULL;
245    }
246
247    if(rules == NULL || rulesLength < -1) {
248        *status = U_ILLEGAL_ARGUMENT_ERROR;
249        return 0;
250    }
251
252    if(rulesLength == -1) {
253        rulesLength = u_strlen(rules);
254    }
255
256    if(parseError == NULL){
257        parseError = &tErr;
258    }
259
260    switch(normalizationMode) {
261    case UCOL_OFF:
262    case UCOL_ON:
263    case UCOL_DEFAULT:
264        norm = normalizationMode;
265        break;
266    default:
267        *status = U_ILLEGAL_ARGUMENT_ERROR;
268        return 0;
269    }
270
271    UCollator *UCA = ucol_initUCA(status);
272
273    if(U_FAILURE(*status)){
274        return NULL;
275    }
276
277    ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status);
278    ucol_tok_assembleTokenList(&src,parseError, status);
279
280    if(U_FAILURE(*status)) {
281        /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
282        /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
283        /* so something might be done here... or on lower level */
284#ifdef UCOL_DEBUG
285        if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
286            fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source);
287        } else {
288            fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
289        }
290#endif
291        ucol_tok_closeTokenList(&src);
292        return NULL;
293    }
294    UCollator *result = NULL;
295    UCATableHeader *table = NULL;
296
297    if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
298        /* also, if we wanted to remove some contractions, we should make a tailoring */
299        table = ucol_assembleTailoringTable(&src, status);
300        if(U_SUCCESS(*status)) {
301            // builder version
302            table->version[0] = UCOL_BUILDER_VERSION;
303            // no tailoring information on this level
304            table->version[1] = table->version[2] = table->version[3] = 0;
305            // set UCD version
306            u_getUnicodeVersion(table->UCDVersion);
307            // set UCA version
308            uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
309            result = ucol_initCollator(table, 0, UCA, status);
310            result->hasRealData = TRUE;
311            result->freeImageOnClose = TRUE;
312        }
313    } else { /* no rules, but no error either */
314        // must be only options
315        // We will init the collator from UCA
316        result = ucol_initCollator(UCA->image, 0, UCA, status);
317        // And set only the options
318        UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
319        /* test for NULL */
320        if (opts == NULL) {
321            *status = U_MEMORY_ALLOCATION_ERROR;
322            goto cleanup;
323        }
324        uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
325        ucol_setOptionsFromHeader(result, opts, status);
326        result->freeOptionsOnClose = TRUE;
327        result->hasRealData = FALSE;
328        result->freeImageOnClose = FALSE;
329    }
330
331    if(U_SUCCESS(*status)) {
332        UChar *newRules;
333        result->dataVersion[0] = UCOL_BUILDER_VERSION;
334        if(rulesLength > 0) {
335            newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
336            /* test for NULL */
337            if (newRules == NULL) {
338                *status = U_MEMORY_ALLOCATION_ERROR;
339                goto cleanup;
340            }
341            uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
342            newRules[rulesLength]=0;
343            result->rules = newRules;
344            result->rulesLength = rulesLength;
345            result->freeRulesOnClose = TRUE;
346        }
347        result->rb = NULL;
348        result->elements = NULL;
349        result->validLocale = NULL;
350        result->requestedLocale = NULL;
351        ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
352        ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
353    } else {
354cleanup:
355        if(result != NULL) {
356            ucol_close(result);
357        } else {
358            if(table != NULL) {
359                uprv_free(table);
360            }
361        }
362        result = NULL;
363    }
364
365    ucol_tok_closeTokenList(&src);
366
367    return result;
368}
369
370U_CAPI int32_t U_EXPORT2
371ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
372    UErrorCode status = U_ZERO_ERROR;
373    int32_t len = 0;
374    int32_t UCAlen = 0;
375    const UChar* ucaRules = 0;
376    const UChar *rules = ucol_getRules(coll, &len);
377    if(delta == UCOL_FULL_RULES) {
378        /* take the UCA rules and append real rules at the end */
379        /* UCA rules will be probably coming from the root RB */
380        ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
381        /*
382        UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
383        UResourceBundle*  uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
384        ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
385        ures_close(uca);
386        ures_close(cresb);
387        */
388    }
389    if(U_FAILURE(status)) {
390        return 0;
391    }
392    if(buffer!=0 && bufferLen>0){
393        *buffer=0;
394        if(UCAlen > 0) {
395            u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
396        }
397        if(len > 0 && bufferLen > UCAlen) {
398            u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
399        }
400    }
401    return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
402}
403
404static const UChar _NUL = 0;
405
406U_CAPI const UChar* U_EXPORT2
407ucol_getRules(    const    UCollator       *coll,
408              int32_t            *length)
409{
410    if(coll->rules != NULL) {
411        *length = coll->rulesLength;
412        return coll->rules;
413    }
414    else {
415        *length = 0;
416        return &_NUL;
417    }
418}
419
420U_CAPI UBool U_EXPORT2
421ucol_equals(const UCollator *source, const UCollator *target) {
422    UErrorCode status = U_ZERO_ERROR;
423    // if pointers are equal, collators are equal
424    if(source == target) {
425        return TRUE;
426    }
427    int32_t i = 0, j = 0;
428    // if any of attributes are different, collators are not equal
429    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
430        if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
431            return FALSE;
432        }
433    }
434
435    int32_t sourceRulesLen = 0, targetRulesLen = 0;
436    const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
437    const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
438
439    if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
440        // all the attributes are equal and the rules are equal - collators are equal
441        return(TRUE);
442    }
443    // hard part, need to construct tree from rules and see if they yield the same tailoring
444    UBool result = TRUE;
445    UParseError parseError;
446    UColTokenParser sourceParser, targetParser;
447    int32_t sourceListLen = 0, targetListLen = 0;
448    ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
449    ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
450    sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
451    targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
452
453    if(sourceListLen != targetListLen) {
454        // different number of resets
455        result = FALSE;
456    } else {
457        UColToken *sourceReset = NULL, *targetReset = NULL;
458        UChar *sourceResetString = NULL, *targetResetString = NULL;
459        int32_t sourceStringLen = 0, targetStringLen = 0;
460        for(i = 0; i < sourceListLen; i++) {
461            sourceReset = sourceParser.lh[i].reset;
462            sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
463            sourceStringLen = sourceReset->source >> 24;
464            for(j = 0; j < sourceListLen; j++) {
465                targetReset = targetParser.lh[j].reset;
466                targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
467                targetStringLen = targetReset->source >> 24;
468                if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
469                    sourceReset = sourceParser.lh[i].first;
470                    targetReset = targetParser.lh[j].first;
471                    while(sourceReset != NULL && targetReset != NULL) {
472                        sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
473                        sourceStringLen = sourceReset->source >> 24;
474                        targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
475                        targetStringLen = targetReset->source >> 24;
476                        if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
477                            result = FALSE;
478                            goto returnResult;
479                        }
480                        // probably also need to check the expansions
481                        if(sourceReset->expansion) {
482                            if(!targetReset->expansion) {
483                                result = FALSE;
484                                goto returnResult;
485                            } else {
486                                // compare expansions
487                                sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
488                                sourceStringLen = sourceReset->expansion >> 24;
489                                targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
490                                targetStringLen = targetReset->expansion >> 24;
491                                if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
492                                    result = FALSE;
493                                    goto returnResult;
494                                }
495                            }
496                        } else {
497                            if(targetReset->expansion) {
498                                result = FALSE;
499                                goto returnResult;
500                            }
501                        }
502                        sourceReset = sourceReset->next;
503                        targetReset = targetReset->next;
504                    }
505                    if(sourceReset != targetReset) { // at least one is not NULL
506                        // there are more tailored elements in one list
507                        result = FALSE;
508                        goto returnResult;
509                    }
510
511
512                    break;
513                }
514            }
515            // couldn't find the reset anchor, so the collators are not equal
516            if(j == sourceListLen) {
517                result = FALSE;
518                goto returnResult;
519            }
520        }
521    }
522
523returnResult:
524    ucol_tok_closeTokenList(&sourceParser);
525    ucol_tok_closeTokenList(&targetParser);
526    return result;
527
528}
529
530U_CAPI int32_t U_EXPORT2
531ucol_getDisplayName(    const    char        *objLoc,
532                    const    char        *dispLoc,
533                    UChar             *result,
534                    int32_t         resultLength,
535                    UErrorCode        *status)
536{
537    U_NAMESPACE_USE
538
539    if(U_FAILURE(*status)) return -1;
540    UnicodeString dst;
541    if(!(result==NULL && resultLength==0)) {
542        // NULL destination for pure preflighting: empty dummy string
543        // otherwise, alias the destination buffer
544        dst.setTo(result, 0, resultLength);
545    }
546    Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
547    return dst.extract(result, resultLength, *status);
548}
549
550U_CAPI const char* U_EXPORT2
551ucol_getAvailable(int32_t index)
552{
553    int32_t count = 0;
554    const Locale *loc = Collator::getAvailableLocales(count);
555    if (loc != NULL && index < count) {
556        return loc[index].getName();
557    }
558    return NULL;
559}
560
561U_CAPI int32_t U_EXPORT2
562ucol_countAvailable()
563{
564    int32_t count = 0;
565    Collator::getAvailableLocales(count);
566    return count;
567}
568
569#if !UCONFIG_NO_SERVICE
570U_CAPI UEnumeration* U_EXPORT2
571ucol_openAvailableLocales(UErrorCode *status) {
572    U_NAMESPACE_USE
573
574    // This is a wrapper over Collator::getAvailableLocales()
575    if (U_FAILURE(*status)) {
576        return NULL;
577    }
578    StringEnumeration *s = Collator::getAvailableLocales();
579    if (s == NULL) {
580        *status = U_MEMORY_ALLOCATION_ERROR;
581        return NULL;
582    }
583    return uenum_openStringEnumeration(s, status);
584}
585#endif
586
587// Note: KEYWORDS[0] != RESOURCE_NAME - alan
588
589static const char* RESOURCE_NAME = "collations";
590
591static const char* KEYWORDS[] = { "collation" };
592
593#define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
594
595U_CAPI UEnumeration* U_EXPORT2
596ucol_getKeywords(UErrorCode *status) {
597    UEnumeration *result = NULL;
598    if (U_SUCCESS(*status)) {
599        return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
600    }
601    return result;
602}
603
604U_CAPI UEnumeration* U_EXPORT2
605ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
606    if (U_FAILURE(*status)) {
607        return NULL;
608    }
609    // hard-coded to accept exactly one collation keyword
610    // modify if additional collation keyword is added later
611    if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
612    {
613        *status = U_ILLEGAL_ARGUMENT_ERROR;
614        return NULL;
615    }
616    return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
617}
618
619U_CAPI int32_t U_EXPORT2
620ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
621                             const char* keyword, const char* locale,
622                             UBool* isAvailable, UErrorCode* status)
623{
624    // N.B.: Resource name is "collations" but keyword is "collation"
625    return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
626        "collations", keyword, locale,
627        isAvailable, TRUE, status);
628}
629
630/* returns the locale name the collation data comes from */
631U_CAPI const char * U_EXPORT2
632ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
633    return ucol_getLocaleByType(coll, type, status);
634}
635
636U_CAPI const char * U_EXPORT2
637ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
638    const char *result = NULL;
639    if(status == NULL || U_FAILURE(*status)) {
640        return NULL;
641    }
642    UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
643    UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
644
645    switch(type) {
646  case ULOC_ACTUAL_LOCALE:
647      // validLocale is set only if service registration has explicitly set the
648      // requested and valid locales.  if this is the case, the actual locale
649      // is considered to be the valid locale.
650      if (coll->validLocale != NULL) {
651          result = coll->validLocale;
652      } else if(coll->elements != NULL) {
653          result = ures_getLocale(coll->elements, status);
654      }
655      break;
656  case ULOC_VALID_LOCALE:
657      if (coll->validLocale != NULL) {
658          result = coll->validLocale;
659      } else if(coll->rb != NULL) {
660          result = ures_getLocale(coll->rb, status);
661      }
662      break;
663  case ULOC_REQUESTED_LOCALE:
664      result = coll->requestedLocale;
665      break;
666  default:
667      *status = U_ILLEGAL_ARGUMENT_ERROR;
668    }
669    UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
670    UTRACE_EXIT_STATUS(*status);
671    return result;
672}
673
674U_CFUNC void U_EXPORT2
675ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt)
676{
677    if (coll) {
678        if (coll->validLocale) {
679            uprv_free(coll->validLocale);
680        }
681        coll->validLocale = validLocaleToAdopt;
682        if (coll->requestedLocale) { // should always have
683            uprv_free(coll->requestedLocale);
684        }
685        coll->requestedLocale = requestedLocaleToAdopt;
686    }
687}
688
689U_CAPI USet * U_EXPORT2
690ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
691{
692    U_NAMESPACE_USE
693
694    if(status == NULL || U_FAILURE(*status)) {
695        return NULL;
696    }
697    if(coll == NULL || coll->UCA == NULL) {
698        *status = U_ILLEGAL_ARGUMENT_ERROR;
699        return NULL;
700    }
701    UParseError parseError;
702    UColTokenParser src;
703    int32_t rulesLen = 0;
704    const UChar *rules = ucol_getRules(coll, &rulesLen);
705    UBool startOfRules = TRUE;
706    // we internally use the C++ class, for the following reasons:
707    // 1. we need to utilize canonical iterator, which is a C++ only class
708    // 2. canonical iterator returns UnicodeStrings - USet cannot take them
709    // 3. USet is internally really UnicodeSet, C is just a wrapper
710    UnicodeSet *tailored = new UnicodeSet();
711    UnicodeString pattern;
712    UnicodeString empty;
713    CanonicalIterator it(empty, *status);
714
715
716    // The idea is to tokenize the rule set. For each non-reset token,
717    // we add all the canonicaly equivalent FCD sequences
718    ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status);
719    while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) {
720        startOfRules = FALSE;
721        if(src.parsedToken.strength != UCOL_TOK_RESET) {
722            const UChar *stuff = src.source+(src.parsedToken.charsOffset);
723            it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
724            pattern = it.next();
725            while(!pattern.isBogus()) {
726                if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
727                    tailored->add(pattern);
728                }
729                pattern = it.next();
730            }
731        }
732    }
733    ucol_tok_closeTokenList(&src);
734    return (USet *)tailored;
735}
736
737#endif /* #if !UCONFIG_NO_COLLATION */
738