1/*
2*******************************************************************************
3*   Copyright (C) 2004-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  ucol_sit.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11* Modification history
12* Date        Name      Comments
13* 03/12/2004  weiv      Creation
14*/
15
16#include "unicode/ustring.h"
17#include "unicode/udata.h"
18#include "unicode/utf16.h"
19#include "utracimp.h"
20#include "ucol_imp.h"
21#include "cmemory.h"
22#include "cstring.h"
23#include "uresimp.h"
24#include "unicode/coll.h"
25
26#ifdef UCOL_TRACE_SIT
27# include <stdio.h>
28#endif
29
30#if !UCONFIG_NO_COLLATION
31
32#include "unicode/tblcoll.h"
33
34enum OptionsList {
35    UCOL_SIT_LANGUAGE = 0,
36    UCOL_SIT_SCRIPT   = 1,
37    UCOL_SIT_REGION   = 2,
38    UCOL_SIT_VARIANT  = 3,
39    UCOL_SIT_KEYWORD  = 4,
40    UCOL_SIT_PROVIDER = 5,
41    UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part of LocElements */
42
43    UCOL_SIT_BCP47,
44    UCOL_SIT_STRENGTH,
45    UCOL_SIT_CASE_LEVEL,
46    UCOL_SIT_CASE_FIRST,
47    UCOL_SIT_NUMERIC_COLLATION,
48    UCOL_SIT_ALTERNATE_HANDLING,
49    UCOL_SIT_NORMALIZATION_MODE,
50    UCOL_SIT_FRENCH_COLLATION,
51    UCOL_SIT_HIRAGANA_QUATERNARY,
52    UCOL_SIT_VARIABLE_TOP,
53    UCOL_SIT_VARIABLE_TOP_VALUE,
54    UCOL_SIT_ITEMS_COUNT
55};
56
57/* option starters chars. */
58static const char alternateHArg     = 'A';
59static const char variableTopValArg = 'B';
60static const char caseFirstArg      = 'C';
61static const char numericCollArg    = 'D';
62static const char caseLevelArg      = 'E';
63static const char frenchCollArg     = 'F';
64static const char hiraganaQArg      = 'H';
65static const char keywordArg        = 'K';
66static const char languageArg       = 'L';
67static const char normArg           = 'N';
68static const char providerArg       = 'P';
69static const char regionArg         = 'R';
70static const char strengthArg       = 'S';
71static const char variableTopArg    = 'T';
72static const char variantArg        = 'V';
73static const char RFC3066Arg        = 'X';
74static const char scriptArg         = 'Z';
75
76static const char collationKeyword[]  = "@collation=";
77static const char providerKeyword[]  = "@sp=";
78
79
80static const int32_t locElementCount = UCOL_SIT_LOCELEMENT_MAX+1;
81static const int32_t locElementCapacity = 32;
82static const int32_t loc3066Capacity = 256;
83static const int32_t locProviderCapacity = 10;
84static const int32_t internalBufferSize = 512;
85
86/* structure containing specification of a collator. Initialized
87 * from a short string. Also used to construct a short string from a
88 * collator instance
89 */
90struct CollatorSpec {
91    char locElements[locElementCount][locElementCapacity];
92    char locale[loc3066Capacity];
93    char provider[locProviderCapacity];
94    UColAttributeValue options[UCOL_ATTRIBUTE_COUNT];
95    uint32_t variableTopValue;
96    UChar variableTopString[locElementCapacity];
97    int32_t variableTopStringLen;
98    UBool variableTopSet;
99    struct {
100        const char *start;
101        int32_t len;
102    } entries[UCOL_SIT_ITEMS_COUNT];
103};
104
105
106/* structure for converting between character attribute
107 * representation and real collation attribute value.
108 */
109struct AttributeConversion {
110    char letter;
111    UColAttributeValue value;
112};
113
114static const AttributeConversion conversions[12] = {
115    { '1', UCOL_PRIMARY },
116    { '2', UCOL_SECONDARY },
117    { '3', UCOL_TERTIARY },
118    { '4', UCOL_QUATERNARY },
119    { 'D', UCOL_DEFAULT },
120    { 'I', UCOL_IDENTICAL },
121    { 'L', UCOL_LOWER_FIRST },
122    { 'N', UCOL_NON_IGNORABLE },
123    { 'O', UCOL_ON },
124    { 'S', UCOL_SHIFTED },
125    { 'U', UCOL_UPPER_FIRST },
126    { 'X', UCOL_OFF }
127};
128
129
130static UColAttributeValue
131ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) {
132    uint32_t i = 0;
133    for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) {
134        if(conversions[i].letter == letter) {
135            return conversions[i].value;
136        }
137    }
138    *status = U_ILLEGAL_ARGUMENT_ERROR;
139#ifdef UCOL_TRACE_SIT
140    fprintf(stderr, "%s:%d: unknown letter %c: %s\n", __FILE__, __LINE__, letter, u_errorName(*status));
141#endif
142    return UCOL_DEFAULT;
143}
144
145/* function prototype for functions used to parse a short string */
146U_CDECL_BEGIN
147typedef const char* U_CALLCONV
148ActionFunction(CollatorSpec *spec, uint32_t value1, const char* string,
149               UErrorCode *status);
150U_CDECL_END
151
152U_CDECL_BEGIN
153static const char* U_CALLCONV
154_processLocaleElement(CollatorSpec *spec, uint32_t value, const char* string,
155                      UErrorCode *status)
156{
157    int32_t len = 0;
158    do {
159        if(value == UCOL_SIT_LANGUAGE || value == UCOL_SIT_KEYWORD || value == UCOL_SIT_PROVIDER) {
160            spec->locElements[value][len++] = uprv_tolower(*string);
161        } else {
162            spec->locElements[value][len++] = *string;
163        }
164    } while(*(++string) != '_' && *string && len < locElementCapacity);
165    if(len >= locElementCapacity) {
166        *status = U_BUFFER_OVERFLOW_ERROR;
167        return string;
168    }
169    // don't skip the underscore at the end
170    return string;
171}
172U_CDECL_END
173
174U_CDECL_BEGIN
175static const char* U_CALLCONV
176_processRFC3066Locale(CollatorSpec *spec, uint32_t, const char* string,
177                      UErrorCode *status)
178{
179    char terminator = *string;
180    string++;
181    const char *end = uprv_strchr(string+1, terminator);
182    if(end == NULL || end - string >= loc3066Capacity) {
183        *status = U_BUFFER_OVERFLOW_ERROR;
184        return string;
185    } else {
186        uprv_strncpy(spec->locale, string, end-string);
187        return end+1;
188    }
189}
190
191U_CDECL_END
192
193U_CDECL_BEGIN
194static const char* U_CALLCONV
195_processCollatorOption(CollatorSpec *spec, uint32_t option, const char* string,
196                       UErrorCode *status)
197{
198    spec->options[option] = ucol_sit_letterToAttributeValue(*string, status);
199    if((*(++string) != '_' && *string) || U_FAILURE(*status)) {
200#ifdef UCOL_TRACE_SIT
201    fprintf(stderr, "%s:%d: unknown collator option at '%s': %s\n", __FILE__, __LINE__, string, u_errorName(*status));
202#endif
203        *status = U_ILLEGAL_ARGUMENT_ERROR;
204    }
205    return string;
206}
207U_CDECL_END
208
209
210static UChar
211readHexCodeUnit(const char **string, UErrorCode *status)
212{
213    UChar result = 0;
214    int32_t value = 0;
215    char c;
216    int32_t noDigits = 0;
217    while((c = **string) != 0 && noDigits < 4) {
218        if( c >= '0' && c <= '9') {
219            value = c - '0';
220        } else if ( c >= 'a' && c <= 'f') {
221            value = c - 'a' + 10;
222        } else if ( c >= 'A' && c <= 'F') {
223            value = c - 'A' + 10;
224        } else {
225            *status = U_ILLEGAL_ARGUMENT_ERROR;
226#ifdef UCOL_TRACE_SIT
227            fprintf(stderr, "%s:%d: Bad hex char at '%s': %s\n", __FILE__, __LINE__, *string, u_errorName(*status));
228#endif
229            return 0;
230        }
231        result = (result << 4) | (UChar)value;
232        noDigits++;
233        (*string)++;
234    }
235    // if the string was terminated before we read 4 digits, set an error
236    if(noDigits < 4) {
237        *status = U_ILLEGAL_ARGUMENT_ERROR;
238#ifdef UCOL_TRACE_SIT
239        fprintf(stderr, "%s:%d: Short (only %d digits, wanted 4) at '%s': %s\n", __FILE__, __LINE__, noDigits,*string, u_errorName(*status));
240#endif
241    }
242    return result;
243}
244
245U_CDECL_BEGIN
246static const char* U_CALLCONV
247_processVariableTop(CollatorSpec *spec, uint32_t value1, const char* string, UErrorCode *status)
248{
249    // get four digits
250    int32_t i = 0;
251    if(!value1) {
252        while(U_SUCCESS(*status) && i < locElementCapacity && *string != 0 && *string != '_') {
253            spec->variableTopString[i++] = readHexCodeUnit(&string, status);
254        }
255        spec->variableTopStringLen = i;
256        if(i == locElementCapacity && *string != 0 && *string != '_') {
257            *status = U_BUFFER_OVERFLOW_ERROR;
258        }
259    } else {
260        spec->variableTopValue = readHexCodeUnit(&string, status);
261    }
262    if(U_SUCCESS(*status)) {
263        spec->variableTopSet = TRUE;
264    }
265    return string;
266}
267U_CDECL_END
268
269
270/* Table for parsing short strings */
271struct ShortStringOptions {
272    char optionStart;
273    ActionFunction *action;
274    uint32_t attr;
275};
276
277static const ShortStringOptions options[UCOL_SIT_ITEMS_COUNT] =
278{
279/* 10 ALTERNATE_HANDLING */   {alternateHArg,     _processCollatorOption, UCOL_ALTERNATE_HANDLING }, // alternate  N, S, D
280/* 15 VARIABLE_TOP_VALUE */   {variableTopValArg, _processVariableTop,    1 },
281/* 08 CASE_FIRST */           {caseFirstArg,      _processCollatorOption, UCOL_CASE_FIRST }, // case first L, U, X, D
282/* 09 NUMERIC_COLLATION */    {numericCollArg,    _processCollatorOption, UCOL_NUMERIC_COLLATION }, // codan      O, X, D
283/* 07 CASE_LEVEL */           {caseLevelArg,      _processCollatorOption, UCOL_CASE_LEVEL }, // case level O, X, D
284/* 12 FRENCH_COLLATION */     {frenchCollArg,     _processCollatorOption, UCOL_FRENCH_COLLATION }, // french     O, X, D
285/* 13 HIRAGANA_QUATERNARY] */ {hiraganaQArg,      _processCollatorOption, UCOL_HIRAGANA_QUATERNARY_MODE }, // hiragana   O, X, D
286/* 04 KEYWORD */              {keywordArg,        _processLocaleElement,  UCOL_SIT_KEYWORD }, // keyword
287/* 00 LANGUAGE */             {languageArg,       _processLocaleElement,  UCOL_SIT_LANGUAGE }, // language
288/* 11 NORMALIZATION_MODE */   {normArg,           _processCollatorOption, UCOL_NORMALIZATION_MODE }, // norm       O, X, D
289/* 02 REGION */               {regionArg,         _processLocaleElement,  UCOL_SIT_REGION }, // region
290/* 06 STRENGTH */             {strengthArg,       _processCollatorOption, UCOL_STRENGTH }, // strength   1, 2, 3, 4, I, D
291/* 14 VARIABLE_TOP */         {variableTopArg,    _processVariableTop,    0 },
292/* 03 VARIANT */              {variantArg,        _processLocaleElement,  UCOL_SIT_VARIANT }, // variant
293/* 05 RFC3066BIS */           {RFC3066Arg,        _processRFC3066Locale,  0 }, // rfc3066bis locale name
294/* 01 SCRIPT */               {scriptArg,         _processLocaleElement,  UCOL_SIT_SCRIPT },  // script
295/*    PROVIDER */             {providerArg,       _processLocaleElement, UCOL_SIT_PROVIDER }
296};
297
298
299static
300const char* ucol_sit_readOption(const char *start, CollatorSpec *spec,
301                            UErrorCode *status)
302{
303  int32_t i = 0;
304
305  for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
306      if(*start == options[i].optionStart) {
307          spec->entries[i].start = start;
308          const char* end = options[i].action(spec, options[i].attr, start+1, status);
309          spec->entries[i].len = (int32_t)(end - start);
310          return end;
311      }
312  }
313  *status = U_ILLEGAL_ARGUMENT_ERROR;
314#ifdef UCOL_TRACE_SIT
315  fprintf(stderr, "%s:%d: Unknown option at '%s': %s\n", __FILE__, __LINE__, start, u_errorName(*status));
316#endif
317  return start;
318}
319
320static
321void ucol_sit_initCollatorSpecs(CollatorSpec *spec)
322{
323    // reset everything
324    uprv_memset(spec, 0, sizeof(CollatorSpec));
325    // set collation options to default
326    int32_t i = 0;
327    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
328        spec->options[i] = UCOL_DEFAULT;
329    }
330}
331
332static const char*
333ucol_sit_readSpecs(CollatorSpec *s, const char *string,
334                        UParseError *parseError, UErrorCode *status)
335{
336    const char *definition = string;
337    while(U_SUCCESS(*status) && *string) {
338        string = ucol_sit_readOption(string, s, status);
339        // advance over '_'
340        while(*string && *string == '_') {
341            string++;
342        }
343    }
344    if(U_FAILURE(*status)) {
345        parseError->offset = (int32_t)(string - definition);
346    }
347    return string;
348}
349
350static
351int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, int32_t capacity, UErrorCode *status)
352{
353    int32_t i = 0, j = 0;
354    int32_t len = 0;
355    char optName;
356    if(U_SUCCESS(*status)) {
357        for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
358            if(s->entries[i].start) {
359                if(len) {
360                    if(len < capacity) {
361                        uprv_strcat(destination, "_");
362                    }
363                    len++;
364                }
365                optName = *(s->entries[i].start);
366                if(optName == languageArg || optName == regionArg || optName == variantArg || optName == keywordArg) {
367                    for(j = 0; j < s->entries[i].len; j++) {
368                        if(len + j < capacity) {
369                            destination[len+j] = uprv_toupper(*(s->entries[i].start+j));
370                        }
371                    }
372                    len += s->entries[i].len;
373                } else {
374                    len += s->entries[i].len;
375                    if(len < capacity) {
376                        uprv_strncat(destination,s->entries[i].start, s->entries[i].len);
377                    }
378                }
379            }
380        }
381        return len;
382    } else {
383        return 0;
384    }
385}
386
387static void
388ucol_sit_calculateWholeLocale(CollatorSpec *s) {
389    // put the locale together, unless we have a done
390    // locale
391    if(s->locale[0] == 0) {
392        // first the language
393        uprv_strcat(s->locale, s->locElements[UCOL_SIT_LANGUAGE]);
394        // then the script, if present
395        if(*(s->locElements[UCOL_SIT_SCRIPT])) {
396            uprv_strcat(s->locale, "_");
397            uprv_strcat(s->locale, s->locElements[UCOL_SIT_SCRIPT]);
398        }
399        // then the region, if present
400        if(*(s->locElements[UCOL_SIT_REGION])) {
401            uprv_strcat(s->locale, "_");
402            uprv_strcat(s->locale, s->locElements[UCOL_SIT_REGION]);
403        } else if(*(s->locElements[UCOL_SIT_VARIANT])) { // if there is a variant, we need an underscore
404            uprv_strcat(s->locale, "_");
405        }
406        // add variant, if there
407        if(*(s->locElements[UCOL_SIT_VARIANT])) {
408            uprv_strcat(s->locale, "_");
409            uprv_strcat(s->locale, s->locElements[UCOL_SIT_VARIANT]);
410        }
411
412        // if there is a collation keyword, add that too
413        if(*(s->locElements[UCOL_SIT_KEYWORD])) {
414            uprv_strcat(s->locale, collationKeyword);
415            uprv_strcat(s->locale, s->locElements[UCOL_SIT_KEYWORD]);
416        }
417
418        // if there is a provider keyword, add that too
419        if(*(s->locElements[UCOL_SIT_PROVIDER])) {
420            uprv_strcat(s->locale, providerKeyword);
421            uprv_strcat(s->locale, s->locElements[UCOL_SIT_PROVIDER]);
422        }
423    }
424}
425
426
427U_CAPI void U_EXPORT2
428ucol_prepareShortStringOpen( const char *definition,
429                          UBool,
430                          UParseError *parseError,
431                          UErrorCode *status)
432{
433    if(U_FAILURE(*status)) return;
434
435    UParseError internalParseError;
436
437    if(!parseError) {
438        parseError = &internalParseError;
439    }
440    parseError->line = 0;
441    parseError->offset = 0;
442    parseError->preContext[0] = 0;
443    parseError->postContext[0] = 0;
444
445
446    // first we want to pick stuff out of short string.
447    // we'll end up with an UCA version, locale and a bunch of
448    // settings
449
450    // analyse the string in order to get everything we need.
451    CollatorSpec s;
452    ucol_sit_initCollatorSpecs(&s);
453    ucol_sit_readSpecs(&s, definition, parseError, status);
454    ucol_sit_calculateWholeLocale(&s);
455
456    char buffer[internalBufferSize];
457    uprv_memset(buffer, 0, internalBufferSize);
458    uloc_canonicalize(s.locale, buffer, internalBufferSize, status);
459
460    UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer, status);
461    /* we try to find stuff from keyword */
462    UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
463    UResourceBundle *collElem = NULL;
464    char keyBuffer[256];
465    // if there is a keyword, we pick it up and try to get elements
466    if(!uloc_getKeywordValue(buffer, "collation", keyBuffer, 256, status)) {
467      // no keyword. we try to find the default setting, which will give us the keyword value
468      UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, status);
469      if(U_SUCCESS(*status)) {
470        int32_t defaultKeyLen = 0;
471        const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, status);
472        u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
473        keyBuffer[defaultKeyLen] = 0;
474      } else {
475        *status = U_INTERNAL_PROGRAM_ERROR;
476        return;
477      }
478      ures_close(defaultColl);
479    }
480    collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
481    ures_close(collElem);
482    ures_close(collations);
483    ures_close(b);
484}
485
486
487U_CAPI UCollator* U_EXPORT2
488ucol_openFromShortString( const char *definition,
489                          UBool forceDefaults,
490                          UParseError *parseError,
491                          UErrorCode *status)
492{
493    UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN_FROM_SHORT_STRING);
494    UTRACE_DATA1(UTRACE_INFO, "short string = \"%s\"", definition);
495
496    if(U_FAILURE(*status)) return 0;
497
498    UParseError internalParseError;
499
500    if(!parseError) {
501        parseError = &internalParseError;
502    }
503    parseError->line = 0;
504    parseError->offset = 0;
505    parseError->preContext[0] = 0;
506    parseError->postContext[0] = 0;
507
508
509    // first we want to pick stuff out of short string.
510    // we'll end up with an UCA version, locale and a bunch of
511    // settings
512
513    // analyse the string in order to get everything we need.
514    const char *string = definition;
515    CollatorSpec s;
516    ucol_sit_initCollatorSpecs(&s);
517    string = ucol_sit_readSpecs(&s, definition, parseError, status);
518    ucol_sit_calculateWholeLocale(&s);
519
520    char buffer[internalBufferSize];
521    uprv_memset(buffer, 0, internalBufferSize);
522    uloc_canonicalize(s.locale, buffer, internalBufferSize, status);
523
524    UCollator *result = ucol_open(buffer, status);
525    int32_t i = 0;
526
527    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
528        if(s.options[i] != UCOL_DEFAULT) {
529            if(forceDefaults || ucol_getAttribute(result, (UColAttribute)i, status) != s.options[i]) {
530                ucol_setAttribute(result, (UColAttribute)i, s.options[i], status);
531            }
532
533            if(U_FAILURE(*status)) {
534                parseError->offset = (int32_t)(string - definition);
535                ucol_close(result);
536                return NULL;
537            }
538
539        }
540    }
541    if(s.variableTopSet) {
542        if(s.variableTopString[0]) {
543            ucol_setVariableTop(result, s.variableTopString, s.variableTopStringLen, status);
544        } else { // we set by value, using 'B'
545            ucol_restoreVariableTop(result, s.variableTopValue, status);
546        }
547    }
548
549
550    if(U_FAILURE(*status)) { // here it can only be a bogus value
551        ucol_close(result);
552        result = NULL;
553    }
554
555    UTRACE_EXIT_PTR_STATUS(result, *status);
556    return result;
557}
558
559
560U_CAPI int32_t U_EXPORT2
561ucol_getShortDefinitionString(const UCollator *coll,
562                              const char *locale,
563                              char *dst,
564                              int32_t capacity,
565                              UErrorCode *status)
566{
567    if(U_FAILURE(*status)) return 0;
568    if(coll == NULL) {
569        *status = U_ILLEGAL_ARGUMENT_ERROR;
570        return 0;
571    }
572    return ((icu::Collator*)coll)->internalGetShortDefinitionString(locale,dst,capacity,*status);
573}
574
575U_CAPI int32_t U_EXPORT2
576ucol_normalizeShortDefinitionString(const char *definition,
577                                    char *destination,
578                                    int32_t capacity,
579                                    UParseError *parseError,
580                                    UErrorCode *status)
581{
582
583    if(U_FAILURE(*status)) {
584        return 0;
585    }
586
587    if(destination) {
588        uprv_memset(destination, 0, capacity*sizeof(char));
589    }
590
591    UParseError pe;
592    if(!parseError) {
593        parseError = &pe;
594    }
595
596    // validate
597    CollatorSpec s;
598    ucol_sit_initCollatorSpecs(&s);
599    ucol_sit_readSpecs(&s, definition, parseError, status);
600    return ucol_sit_dumpSpecs(&s, destination, capacity, status);
601}
602
603/**
604 * Get a set containing the contractions defined by the collator. The set includes
605 * both the UCA contractions and the contractions defined by the collator
606 * @param coll collator
607 * @param conts the set to hold the result
608 * @param status to hold the error code
609 * @return the size of the contraction set
610 */
611U_CAPI int32_t U_EXPORT2
612ucol_getContractions( const UCollator *coll,
613                  USet *contractions,
614                  UErrorCode *status)
615{
616  ucol_getContractionsAndExpansions(coll, contractions, NULL, FALSE, status);
617  return uset_getItemCount(contractions);
618}
619
620/**
621 * Get a set containing the expansions defined by the collator. The set includes
622 * both the UCA expansions and the expansions defined by the tailoring
623 * @param coll collator
624 * @param conts the set to hold the result
625 * @param addPrefixes add the prefix contextual elements to contractions
626 * @param status to hold the error code
627 *
628 * @draft ICU 3.4
629 */
630U_CAPI void U_EXPORT2
631ucol_getContractionsAndExpansions( const UCollator *coll,
632                  USet *contractions,
633                  USet *expansions,
634                  UBool addPrefixes,
635                  UErrorCode *status)
636{
637    if(U_FAILURE(*status)) {
638        return;
639    }
640    if(coll == NULL) {
641        *status = U_ILLEGAL_ARGUMENT_ERROR;
642        return;
643    }
644    const icu::RuleBasedCollator *rbc = icu::RuleBasedCollator::rbcFromUCollator(coll);
645    if(rbc == NULL) {
646        *status = U_UNSUPPORTED_ERROR;
647        return;
648    }
649    rbc->internalGetContractionsAndExpansions(
650            icu::UnicodeSet::fromUSet(contractions),
651            icu::UnicodeSet::fromUSet(expansions),
652            addPrefixes, *status);
653}
654#endif
655