1/*
2*******************************************************************************
3*   Copyright (C) 1996-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  ucol_res.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11* Description:
12* This file contains dependencies that the collation run-time doesn't normally
13* need. This mainly contains resource bundle usage and collation meta information
14*
15* Modification history
16* Date        Name      Comments
17* 1996-1999   various members of ICU team maintained C API for collation framework
18* 02/16/2001  synwee    Added internal method getPrevSpecialCE
19* 03/01/2001  synwee    Added maxexpansion functionality.
20* 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
21* 12/08/2004  grhoten   Split part of ucol.cpp into ucol_res.cpp
22* 2012-2014   markus    Rewritten in C++ again.
23*/
24
25#include "unicode/utypes.h"
26
27#if !UCONFIG_NO_COLLATION
28
29#include "unicode/coll.h"
30#include "unicode/localpointer.h"
31#include "unicode/locid.h"
32#include "unicode/tblcoll.h"
33#include "unicode/ucol.h"
34#include "unicode/uloc.h"
35#include "unicode/unistr.h"
36#include "unicode/ures.h"
37#include "cmemory.h"
38#include "cstring.h"
39#include "collationdatareader.h"
40#include "collationroot.h"
41#include "collationtailoring.h"
42#include "putilimp.h"
43#include "uassert.h"
44#include "ucln_in.h"
45#include "ucol_imp.h"
46#include "uenumimp.h"
47#include "ulist.h"
48#include "umutex.h"
49#include "uresimp.h"
50#include "ustrenum.h"
51#include "utracimp.h"
52
53#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
54
55U_NAMESPACE_BEGIN
56
57namespace {
58
59static const UChar *rootRules = NULL;
60static int32_t rootRulesLength = 0;
61static UResourceBundle *rootBundle = NULL;
62static UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
63
64}  // namespace
65
66U_CDECL_BEGIN
67
68static UBool U_CALLCONV
69ucol_res_cleanup() {
70    rootRules = NULL;
71    rootRulesLength = 0;
72    ures_close(rootBundle);
73    rootBundle = NULL;
74    gInitOnce.reset();
75    return TRUE;
76}
77
78U_CDECL_END
79
80void
81CollationLoader::loadRootRules(UErrorCode &errorCode) {
82    if(U_FAILURE(errorCode)) { return; }
83    rootBundle = ures_open(U_ICUDATA_COLL, kRootLocaleName, &errorCode);
84    if(U_FAILURE(errorCode)) { return; }
85    rootRules = ures_getStringByKey(rootBundle, "UCARules", &rootRulesLength, &errorCode);
86    if(U_FAILURE(errorCode)) {
87        ures_close(rootBundle);
88        rootBundle = NULL;
89        return;
90    }
91    ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
92}
93
94void
95CollationLoader::appendRootRules(UnicodeString &s) {
96    UErrorCode errorCode = U_ZERO_ERROR;
97    umtx_initOnce(gInitOnce, CollationLoader::loadRootRules, errorCode);
98    if(U_SUCCESS(errorCode)) {
99        s.append(rootRules, rootRulesLength);
100    }
101}
102
103UnicodeString *
104CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) {
105    if(U_FAILURE(errorCode)) { return NULL; }
106    U_ASSERT(collationType != NULL && *collationType != 0);
107    // Copy the type for lowercasing.
108    char type[16];
109    int32_t typeLength = uprv_strlen(collationType);
110    if(typeLength >= LENGTHOF(type)) {
111        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
112        return NULL;
113    }
114    uprv_memcpy(type, collationType, typeLength + 1);
115    T_CString_toLowerCase(type);
116
117    LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, localeID, &errorCode));
118    LocalUResourceBundlePointer collations(
119            ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
120    LocalUResourceBundlePointer data(
121            ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
122    int32_t length;
123    const UChar *s =  ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode);
124    if(U_FAILURE(errorCode)) { return NULL; }
125
126    // No string pointer aliasing so that we need not hold onto the resource bundle.
127    UnicodeString *rules = new UnicodeString(s, length);
128    if(rules == NULL) {
129        errorCode = U_MEMORY_ALLOCATION_ERROR;
130        return NULL;
131    }
132    return rules;
133}
134
135const CollationTailoring *
136CollationLoader::loadTailoring(const Locale &locale, Locale &validLocale, UErrorCode &errorCode) {
137    const CollationTailoring *root = CollationRoot::getRoot(errorCode);
138    if(U_FAILURE(errorCode)) { return NULL; }
139    const char *name = locale.getName();
140    if(*name == 0 || uprv_strcmp(name, "root") == 0) {
141        validLocale = Locale::getRoot();
142        return root;
143    }
144
145    LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, name, &errorCode));
146    if(errorCode == U_MISSING_RESOURCE_ERROR) {
147        errorCode = U_USING_DEFAULT_WARNING;
148        validLocale = Locale::getRoot();
149        return root;
150    }
151    const char *vLocale = ures_getLocaleByType(bundle.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode);
152    if(U_FAILURE(errorCode)) { return NULL; }
153    validLocale = Locale(vLocale);
154
155    // There are zero or more tailorings in the collations table.
156    LocalUResourceBundlePointer collations(
157            ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
158    if(errorCode == U_MISSING_RESOURCE_ERROR) {
159        errorCode = U_USING_DEFAULT_WARNING;
160        return root;
161    }
162    if(U_FAILURE(errorCode)) { return NULL; }
163
164    // Fetch the collation type from the locale ID and the default type from the data.
165    char type[16];
166    int32_t typeLength = locale.getKeywordValue("collation", type, LENGTHOF(type) - 1, errorCode);
167    if(U_FAILURE(errorCode)) {
168        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
169        return NULL;
170    }
171    type[typeLength] = 0;  // in case of U_NOT_TERMINATED_WARNING
172    char defaultType[16];
173    {
174        UErrorCode internalErrorCode = U_ZERO_ERROR;
175        LocalUResourceBundlePointer def(
176                ures_getByKeyWithFallback(collations.getAlias(), "default", NULL,
177                                          &internalErrorCode));
178        int32_t length;
179        const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
180        if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) {
181            u_UCharsToChars(s, defaultType, length + 1);
182        } else {
183            uprv_strcpy(defaultType, "standard");
184        }
185    }
186    if(typeLength == 0 || uprv_strcmp(type, "default") == 0) {
187        uprv_strcpy(type, defaultType);
188    } else {
189        T_CString_toLowerCase(type);
190    }
191
192    // Load the collations/type tailoring, with type fallback.
193    UBool typeFallback = FALSE;
194    LocalUResourceBundlePointer data(
195            ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
196    if(errorCode == U_MISSING_RESOURCE_ERROR &&
197            typeLength > 6 && uprv_strncmp(type, "search", 6) == 0) {
198        // fall back from something like "searchjl" to "search"
199        typeFallback = TRUE;
200        type[6] = 0;
201        errorCode = U_ZERO_ERROR;
202        data.adoptInstead(
203            ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
204    }
205    if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, defaultType) != 0) {
206        // fall back to the default type
207        typeFallback = TRUE;
208        uprv_strcpy(type, defaultType);
209        errorCode = U_ZERO_ERROR;
210        data.adoptInstead(
211            ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
212    }
213    if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, "standard") != 0) {
214        // fall back to the "standard" type
215        typeFallback = TRUE;
216        uprv_strcpy(type, "standard");
217        errorCode = U_ZERO_ERROR;
218        data.adoptInstead(
219            ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
220    }
221    if(errorCode == U_MISSING_RESOURCE_ERROR) {
222        errorCode = U_USING_DEFAULT_WARNING;
223        return root;
224    }
225    if(U_FAILURE(errorCode)) { return NULL; }
226
227    LocalPointer<CollationTailoring> t(new CollationTailoring(root->settings));
228    if(t.isNull() || t->isBogus()) {
229        errorCode = U_MEMORY_ALLOCATION_ERROR;
230        return NULL;
231    }
232
233    // Is this the same as the root collator? If so, then use that instead.
234    const char *actualLocale = ures_getLocaleByType(data.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode);
235    if(U_FAILURE(errorCode)) { return NULL; }
236    if((*actualLocale == 0 || uprv_strcmp(actualLocale, "root") == 0) &&
237            uprv_strcmp(type, "standard") == 0) {
238        if(typeFallback) {
239            errorCode = U_USING_DEFAULT_WARNING;
240        }
241        return root;
242    }
243    t->actualLocale = Locale(actualLocale);
244
245    // deserialize
246    LocalUResourceBundlePointer binary(
247            ures_getByKey(data.getAlias(), "%%CollationBin", NULL, &errorCode));
248    // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available
249    // but that created undesirable dependencies.
250    int32_t length;
251    const uint8_t *inBytes = ures_getBinary(binary.getAlias(), &length, &errorCode);
252    if(U_FAILURE(errorCode)) { return NULL; }
253    CollationDataReader::read(root, inBytes, length, *t, errorCode);
254    // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available
255    // but that created undesirable dependencies.
256    if(U_FAILURE(errorCode)) { return NULL; }
257
258    // Try to fetch the optional rules string.
259    {
260        UErrorCode internalErrorCode = U_ZERO_ERROR;
261        int32_t length;
262        const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length,
263                                             &internalErrorCode);
264        if(U_SUCCESS(errorCode)) {
265            t->rules.setTo(TRUE, s, length);
266        }
267    }
268
269    // Set the collation types on the informational locales,
270    // except when they match the default types (for brevity and backwards compatibility).
271    // For the valid locale, suppress the default type.
272    if(uprv_strcmp(type, defaultType) != 0) {
273        validLocale.setKeywordValue("collation", type, errorCode);
274        if(U_FAILURE(errorCode)) { return NULL; }
275    }
276
277    // For the actual locale, suppress the default type *according to the actual locale*.
278    // For example, zh has default=pinyin and contains all of the Chinese tailorings.
279    // zh_Hant has default=stroke but has no other data.
280    // For the valid locale "zh_Hant" we need to suppress stroke.
281    // For the actual locale "zh" we need to suppress pinyin instead.
282    if(uprv_strcmp(actualLocale, vLocale) != 0) {
283        // Opening a bundle for the actual locale should always succeed.
284        LocalUResourceBundlePointer actualBundle(
285                ures_open(U_ICUDATA_COLL, actualLocale, &errorCode));
286        if(U_FAILURE(errorCode)) { return NULL; }
287        UErrorCode internalErrorCode = U_ZERO_ERROR;
288        LocalUResourceBundlePointer def(
289                ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL,
290                                          &internalErrorCode));
291        int32_t length;
292        const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
293        if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) {
294            u_UCharsToChars(s, defaultType, length + 1);
295        } else {
296            uprv_strcpy(defaultType, "standard");
297        }
298    }
299    if(uprv_strcmp(type, defaultType) != 0) {
300        t->actualLocale.setKeywordValue("collation", type, errorCode);
301        if(U_FAILURE(errorCode)) { return NULL; }
302    }
303
304    if(typeFallback) {
305        errorCode = U_USING_DEFAULT_WARNING;
306    }
307    t->bundle = bundle.orphan();
308    return t.orphan();
309}
310
311U_NAMESPACE_END
312
313U_NAMESPACE_USE
314
315U_CAPI UCollator*
316ucol_open(const char *loc,
317          UErrorCode *status)
318{
319    U_NAMESPACE_USE
320
321    UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
322    UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
323    UCollator *result = NULL;
324
325    Collator *coll = Collator::createInstance(loc, *status);
326    if(U_SUCCESS(*status)) {
327        result = coll->toUCollator();
328    }
329    UTRACE_EXIT_PTR_STATUS(result, *status);
330    return result;
331}
332
333
334U_CAPI int32_t U_EXPORT2
335ucol_getDisplayName(    const    char        *objLoc,
336                    const    char        *dispLoc,
337                    UChar             *result,
338                    int32_t         resultLength,
339                    UErrorCode        *status)
340{
341    U_NAMESPACE_USE
342
343    if(U_FAILURE(*status)) return -1;
344    UnicodeString dst;
345    if(!(result==NULL && resultLength==0)) {
346        // NULL destination for pure preflighting: empty dummy string
347        // otherwise, alias the destination buffer
348        dst.setTo(result, 0, resultLength);
349    }
350    Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
351    return dst.extract(result, resultLength, *status);
352}
353
354U_CAPI const char* U_EXPORT2
355ucol_getAvailable(int32_t index)
356{
357    int32_t count = 0;
358    const Locale *loc = Collator::getAvailableLocales(count);
359    if (loc != NULL && index < count) {
360        return loc[index].getName();
361    }
362    return NULL;
363}
364
365U_CAPI int32_t U_EXPORT2
366ucol_countAvailable()
367{
368    int32_t count = 0;
369    Collator::getAvailableLocales(count);
370    return count;
371}
372
373#if !UCONFIG_NO_SERVICE
374U_CAPI UEnumeration* U_EXPORT2
375ucol_openAvailableLocales(UErrorCode *status) {
376    U_NAMESPACE_USE
377
378    // This is a wrapper over Collator::getAvailableLocales()
379    if (U_FAILURE(*status)) {
380        return NULL;
381    }
382    StringEnumeration *s = icu::Collator::getAvailableLocales();
383    if (s == NULL) {
384        *status = U_MEMORY_ALLOCATION_ERROR;
385        return NULL;
386    }
387    return uenum_openFromStringEnumeration(s, status);
388}
389#endif
390
391// Note: KEYWORDS[0] != RESOURCE_NAME - alan
392
393static const char RESOURCE_NAME[] = "collations";
394
395static const char* const KEYWORDS[] = { "collation" };
396
397#define KEYWORD_COUNT LENGTHOF(KEYWORDS)
398
399U_CAPI UEnumeration* U_EXPORT2
400ucol_getKeywords(UErrorCode *status) {
401    UEnumeration *result = NULL;
402    if (U_SUCCESS(*status)) {
403        return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
404    }
405    return result;
406}
407
408U_CAPI UEnumeration* U_EXPORT2
409ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
410    if (U_FAILURE(*status)) {
411        return NULL;
412    }
413    // hard-coded to accept exactly one collation keyword
414    // modify if additional collation keyword is added later
415    if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
416    {
417        *status = U_ILLEGAL_ARGUMENT_ERROR;
418        return NULL;
419    }
420    return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
421}
422
423static const UEnumeration defaultKeywordValues = {
424    NULL,
425    NULL,
426    ulist_close_keyword_values_iterator,
427    ulist_count_keyword_values,
428    uenum_unextDefault,
429    ulist_next_keyword_value,
430    ulist_reset_keyword_values_iterator
431};
432
433#include <stdio.h>
434
435U_CAPI UEnumeration* U_EXPORT2
436ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale,
437                               UBool /*commonlyUsed*/, UErrorCode* status) {
438    /* Get the locale base name. */
439    char localeBuffer[ULOC_FULLNAME_CAPACITY] = "";
440    uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status);
441
442    /* Create the 2 lists
443     * -values is the temp location for the keyword values
444     * -results hold the actual list used by the UEnumeration object
445     */
446    UList *values = ulist_createEmptyList(status);
447    UList *results = ulist_createEmptyList(status);
448    UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
449    if (U_FAILURE(*status) || en == NULL) {
450        if (en == NULL) {
451            *status = U_MEMORY_ALLOCATION_ERROR;
452        } else {
453            uprv_free(en);
454        }
455        ulist_deleteList(values);
456        ulist_deleteList(results);
457        return NULL;
458    }
459
460    memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
461    en->context = results;
462
463    /* Open the resource bundle for collation with the given locale. */
464    UResourceBundle bundle, collations, collres, defres;
465    ures_initStackObject(&bundle);
466    ures_initStackObject(&collations);
467    ures_initStackObject(&collres);
468    ures_initStackObject(&defres);
469
470    ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
471
472    while (U_SUCCESS(*status)) {
473        ures_getByKey(&bundle, RESOURCE_NAME, &collations, status);
474        ures_resetIterator(&collations);
475        while (U_SUCCESS(*status) && ures_hasNext(&collations)) {
476            ures_getNextResource(&collations, &collres, status);
477            const char *key = ures_getKey(&collres);
478            /* If the key is default, get the string and store it in results list only
479             * if results list is empty.
480             */
481            if (uprv_strcmp(key, "default") == 0) {
482                if (ulist_getListSize(results) == 0) {
483                    char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
484                    int32_t defcollLength = ULOC_KEYWORDS_CAPACITY;
485
486                    ures_getNextResource(&collres, &defres, status);
487#if U_CHARSET_FAMILY==U_ASCII_FAMILY
488			/* optimize - use the utf-8 string */
489                    ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status);
490#else
491                    {
492                       const UChar* defString = ures_getString(&defres, &defcollLength, status);
493                       if(U_SUCCESS(*status)) {
494			   if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) {
495				*status = U_BUFFER_OVERFLOW_ERROR;
496			   } else {
497                           	u_UCharsToChars(defString, defcoll, defcollLength+1);
498			   }
499                       }
500                    }
501#endif
502
503                    ulist_addItemBeginList(results, defcoll, TRUE, status);
504                }
505            } else {
506                ulist_addItemEndList(values, key, FALSE, status);
507            }
508        }
509
510        /* If the locale is "" this is root so exit. */
511        if (uprv_strlen(localeBuffer) == 0) {
512            break;
513        }
514        /* Get the parent locale and open a new resource bundle. */
515        uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status);
516        ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
517    }
518
519    ures_close(&defres);
520    ures_close(&collres);
521    ures_close(&collations);
522    ures_close(&bundle);
523
524    if (U_SUCCESS(*status)) {
525        char *value = NULL;
526        ulist_resetList(values);
527        while ((value = (char *)ulist_getNext(values)) != NULL) {
528            if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) {
529                ulist_addItemEndList(results, value, FALSE, status);
530                if (U_FAILURE(*status)) {
531                    break;
532                }
533            }
534        }
535    }
536
537    ulist_deleteList(values);
538
539    if (U_FAILURE(*status)){
540        uenum_close(en);
541        en = NULL;
542    } else {
543        ulist_resetList(results);
544    }
545
546    return en;
547}
548
549U_CAPI int32_t U_EXPORT2
550ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
551                             const char* keyword, const char* locale,
552                             UBool* isAvailable, UErrorCode* status)
553{
554    // N.B.: Resource name is "collations" but keyword is "collation"
555    return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
556        "collations", keyword, locale,
557        isAvailable, TRUE, status);
558}
559
560#endif /* #if !UCONFIG_NO_COLLATION */
561