1/*
2*******************************************************************************
3*
4*   Copyright (C) 1997-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  loclikely.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2010feb25
14*   created by: Markus W. Scherer
15*
16*   Code for likely and minimized locale subtags, separated out from other .cpp files
17*   that then do not depend on resource bundle code and likely-subtags data.
18*/
19
20#include "unicode/utypes.h"
21#include "unicode/putil.h"
22#include "unicode/uloc.h"
23#include "unicode/ures.h"
24#include "cmemory.h"
25#include "cstring.h"
26#include "ulocimp.h"
27#include "ustr_imp.h"
28
29/**
30 * This function looks for the localeID in the likelySubtags resource.
31 *
32 * @param localeID The tag to find.
33 * @param buffer A buffer to hold the matching entry
34 * @param bufferLength The length of the output buffer
35 * @return A pointer to "buffer" if found, or a null pointer if not.
36 */
37static const char*  U_CALLCONV
38findLikelySubtags(const char* localeID,
39                  char* buffer,
40                  int32_t bufferLength,
41                  UErrorCode* err) {
42    const char* result = NULL;
43
44    if (!U_FAILURE(*err)) {
45        int32_t resLen = 0;
46        const UChar* s = NULL;
47        UErrorCode tmpErr = U_ZERO_ERROR;
48        UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
49        if (U_SUCCESS(tmpErr)) {
50            s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
51
52            if (U_FAILURE(tmpErr)) {
53                /*
54                 * If a resource is missing, it's not really an error, it's
55                 * just that we don't have any data for that particular locale ID.
56                 */
57                if (tmpErr != U_MISSING_RESOURCE_ERROR) {
58                    *err = tmpErr;
59                }
60            }
61            else if (resLen >= bufferLength) {
62                /* The buffer should never overflow. */
63                *err = U_INTERNAL_PROGRAM_ERROR;
64            }
65            else {
66                u_UCharsToChars(s, buffer, resLen + 1);
67                result = buffer;
68            }
69
70            ures_close(subtags);
71        } else {
72            *err = tmpErr;
73        }
74    }
75
76    return result;
77}
78
79/**
80 * Append a tag to a buffer, adding the separator if necessary.  The buffer
81 * must be large enough to contain the resulting tag plus any separator
82 * necessary. The tag must not be a zero-length string.
83 *
84 * @param tag The tag to add.
85 * @param tagLength The length of the tag.
86 * @param buffer The output buffer.
87 * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
88 **/
89static void U_CALLCONV
90appendTag(
91    const char* tag,
92    int32_t tagLength,
93    char* buffer,
94    int32_t* bufferLength) {
95
96    if (*bufferLength > 0) {
97        buffer[*bufferLength] = '_';
98        ++(*bufferLength);
99    }
100
101    uprv_memmove(
102        &buffer[*bufferLength],
103        tag,
104        tagLength);
105
106    *bufferLength += tagLength;
107}
108
109/**
110 * These are the canonical strings for unknown languages, scripts and regions.
111 **/
112static const char* const unknownLanguage = "und";
113static const char* const unknownScript = "Zzzz";
114static const char* const unknownRegion = "ZZ";
115
116/**
117 * Create a tag string from the supplied parameters.  The lang, script and region
118 * parameters may be NULL pointers. If they are, their corresponding length parameters
119 * must be less than or equal to 0.
120 *
121 * If any of the language, script or region parameters are empty, and the alternateTags
122 * parameter is not NULL, it will be parsed for potential language, script and region tags
123 * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
124 * it contains no language tag, the default tag for the unknown language is used.
125 *
126 * If the length of the new string exceeds the capacity of the output buffer,
127 * the function copies as many bytes to the output buffer as it can, and returns
128 * the error U_BUFFER_OVERFLOW_ERROR.
129 *
130 * If an illegal argument is provided, the function returns the error
131 * U_ILLEGAL_ARGUMENT_ERROR.
132 *
133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
134 * the tag string fits in the output buffer, but the null terminator doesn't.
135 *
136 * @param lang The language tag to use.
137 * @param langLength The length of the language tag.
138 * @param script The script tag to use.
139 * @param scriptLength The length of the script tag.
140 * @param region The region tag to use.
141 * @param regionLength The length of the region tag.
142 * @param trailing Any trailing data to append to the new tag.
143 * @param trailingLength The length of the trailing data.
144 * @param alternateTags A string containing any alternate tags.
145 * @param tag The output buffer.
146 * @param tagCapacity The capacity of the output buffer.
147 * @param err A pointer to a UErrorCode for error reporting.
148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
149 **/
150static int32_t U_CALLCONV
151createTagStringWithAlternates(
152    const char* lang,
153    int32_t langLength,
154    const char* script,
155    int32_t scriptLength,
156    const char* region,
157    int32_t regionLength,
158    const char* trailing,
159    int32_t trailingLength,
160    const char* alternateTags,
161    char* tag,
162    int32_t tagCapacity,
163    UErrorCode* err) {
164
165    if (U_FAILURE(*err)) {
166        goto error;
167    }
168    else if (tag == NULL ||
169             tagCapacity <= 0 ||
170             langLength >= ULOC_LANG_CAPACITY ||
171             scriptLength >= ULOC_SCRIPT_CAPACITY ||
172             regionLength >= ULOC_COUNTRY_CAPACITY) {
173        goto error;
174    }
175    else {
176        /**
177         * ULOC_FULLNAME_CAPACITY will provide enough capacity
178         * that we can build a string that contains the language,
179         * script and region code without worrying about overrunning
180         * the user-supplied buffer.
181         **/
182        char tagBuffer[ULOC_FULLNAME_CAPACITY];
183        int32_t tagLength = 0;
184        int32_t capacityRemaining = tagCapacity;
185        UBool regionAppended = FALSE;
186
187        if (langLength > 0) {
188            appendTag(
189                lang,
190                langLength,
191                tagBuffer,
192                &tagLength);
193        }
194        else if (alternateTags == NULL) {
195            /*
196             * Append the value for an unknown language, if
197             * we found no language.
198             */
199            appendTag(
200                unknownLanguage,
201                (int32_t)uprv_strlen(unknownLanguage),
202                tagBuffer,
203                &tagLength);
204        }
205        else {
206            /*
207             * Parse the alternateTags string for the language.
208             */
209            char alternateLang[ULOC_LANG_CAPACITY];
210            int32_t alternateLangLength = sizeof(alternateLang);
211
212            alternateLangLength =
213                uloc_getLanguage(
214                    alternateTags,
215                    alternateLang,
216                    alternateLangLength,
217                    err);
218            if(U_FAILURE(*err) ||
219                alternateLangLength >= ULOC_LANG_CAPACITY) {
220                goto error;
221            }
222            else if (alternateLangLength == 0) {
223                /*
224                 * Append the value for an unknown language, if
225                 * we found no language.
226                 */
227                appendTag(
228                    unknownLanguage,
229                    (int32_t)uprv_strlen(unknownLanguage),
230                    tagBuffer,
231                    &tagLength);
232            }
233            else {
234                appendTag(
235                    alternateLang,
236                    alternateLangLength,
237                    tagBuffer,
238                    &tagLength);
239            }
240        }
241
242        if (scriptLength > 0) {
243            appendTag(
244                script,
245                scriptLength,
246                tagBuffer,
247                &tagLength);
248        }
249        else if (alternateTags != NULL) {
250            /*
251             * Parse the alternateTags string for the script.
252             */
253            char alternateScript[ULOC_SCRIPT_CAPACITY];
254
255            const int32_t alternateScriptLength =
256                uloc_getScript(
257                    alternateTags,
258                    alternateScript,
259                    sizeof(alternateScript),
260                    err);
261
262            if (U_FAILURE(*err) ||
263                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
264                goto error;
265            }
266            else if (alternateScriptLength > 0) {
267                appendTag(
268                    alternateScript,
269                    alternateScriptLength,
270                    tagBuffer,
271                    &tagLength);
272            }
273        }
274
275        if (regionLength > 0) {
276            appendTag(
277                region,
278                regionLength,
279                tagBuffer,
280                &tagLength);
281
282            regionAppended = TRUE;
283        }
284        else if (alternateTags != NULL) {
285            /*
286             * Parse the alternateTags string for the region.
287             */
288            char alternateRegion[ULOC_COUNTRY_CAPACITY];
289
290            const int32_t alternateRegionLength =
291                uloc_getCountry(
292                    alternateTags,
293                    alternateRegion,
294                    sizeof(alternateRegion),
295                    err);
296            if (U_FAILURE(*err) ||
297                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
298                goto error;
299            }
300            else if (alternateRegionLength > 0) {
301                appendTag(
302                    alternateRegion,
303                    alternateRegionLength,
304                    tagBuffer,
305                    &tagLength);
306
307                regionAppended = TRUE;
308            }
309        }
310
311        {
312            const int32_t toCopy =
313                tagLength >= tagCapacity ? tagCapacity : tagLength;
314
315            /**
316             * Copy the partial tag from our internal buffer to the supplied
317             * target.
318             **/
319            uprv_memcpy(
320                tag,
321                tagBuffer,
322                toCopy);
323
324            capacityRemaining -= toCopy;
325        }
326
327        if (trailingLength > 0) {
328            if (capacityRemaining > 0 && !regionAppended) {
329                tag[tagLength++] = '_';
330                --capacityRemaining;
331            }
332
333            if (capacityRemaining > 0) {
334                /*
335                 * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
336                 * don't know if the user-supplied buffers overlap.
337                 */
338                const int32_t toCopy =
339                    trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
340
341                uprv_memmove(
342                    &tag[tagLength],
343                    trailing,
344                    toCopy);
345            }
346        }
347
348        tagLength += trailingLength;
349
350        return u_terminateChars(
351                    tag,
352                    tagCapacity,
353                    tagLength,
354                    err);
355    }
356
357error:
358
359    /**
360     * An overflow indicates the locale ID passed in
361     * is ill-formed.  If we got here, and there was
362     * no previous error, it's an implicit overflow.
363     **/
364    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
365        U_SUCCESS(*err)) {
366        *err = U_ILLEGAL_ARGUMENT_ERROR;
367    }
368
369    return -1;
370}
371
372/**
373 * Create a tag string from the supplied parameters.  The lang, script and region
374 * parameters may be NULL pointers. If they are, their corresponding length parameters
375 * must be less than or equal to 0.  If the lang parameter is an empty string, the
376 * default value for an unknown language is written to the output buffer.
377 *
378 * If the length of the new string exceeds the capacity of the output buffer,
379 * the function copies as many bytes to the output buffer as it can, and returns
380 * the error U_BUFFER_OVERFLOW_ERROR.
381 *
382 * If an illegal argument is provided, the function returns the error
383 * U_ILLEGAL_ARGUMENT_ERROR.
384 *
385 * @param lang The language tag to use.
386 * @param langLength The length of the language tag.
387 * @param script The script tag to use.
388 * @param scriptLength The length of the script tag.
389 * @param region The region tag to use.
390 * @param regionLength The length of the region tag.
391 * @param trailing Any trailing data to append to the new tag.
392 * @param trailingLength The length of the trailing data.
393 * @param tag The output buffer.
394 * @param tagCapacity The capacity of the output buffer.
395 * @param err A pointer to a UErrorCode for error reporting.
396 * @return The length of the tag string, which may be greater than tagCapacity.
397 **/
398static int32_t U_CALLCONV
399createTagString(
400    const char* lang,
401    int32_t langLength,
402    const char* script,
403    int32_t scriptLength,
404    const char* region,
405    int32_t regionLength,
406    const char* trailing,
407    int32_t trailingLength,
408    char* tag,
409    int32_t tagCapacity,
410    UErrorCode* err)
411{
412    return createTagStringWithAlternates(
413                lang,
414                langLength,
415                script,
416                scriptLength,
417                region,
418                regionLength,
419                trailing,
420                trailingLength,
421                NULL,
422                tag,
423                tagCapacity,
424                err);
425}
426
427/**
428 * Parse the language, script, and region subtags from a tag string, and copy the
429 * results into the corresponding output parameters. The buffers are null-terminated,
430 * unless overflow occurs.
431 *
432 * The langLength, scriptLength, and regionLength parameters are input/output
433 * parameters, and must contain the capacity of their corresponding buffers on
434 * input.  On output, they will contain the actual length of the buffers, not
435 * including the null terminator.
436 *
437 * If the length of any of the output subtags exceeds the capacity of the corresponding
438 * buffer, the function copies as many bytes to the output buffer as it can, and returns
439 * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
440 * occurs.
441 *
442 * If an illegal argument is provided, the function returns the error
443 * U_ILLEGAL_ARGUMENT_ERROR.
444 *
445 * @param localeID The locale ID to parse.
446 * @param lang The language tag buffer.
447 * @param langLength The length of the language tag.
448 * @param script The script tag buffer.
449 * @param scriptLength The length of the script tag.
450 * @param region The region tag buffer.
451 * @param regionLength The length of the region tag.
452 * @param err A pointer to a UErrorCode for error reporting.
453 * @return The number of chars of the localeID parameter consumed.
454 **/
455static int32_t U_CALLCONV
456parseTagString(
457    const char* localeID,
458    char* lang,
459    int32_t* langLength,
460    char* script,
461    int32_t* scriptLength,
462    char* region,
463    int32_t* regionLength,
464    UErrorCode* err)
465{
466    const char* position = localeID;
467    int32_t subtagLength = 0;
468
469    if(U_FAILURE(*err) ||
470       localeID == NULL ||
471       lang == NULL ||
472       langLength == NULL ||
473       script == NULL ||
474       scriptLength == NULL ||
475       region == NULL ||
476       regionLength == NULL) {
477        goto error;
478    }
479
480    subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
481    u_terminateChars(lang, *langLength, subtagLength, err);
482
483    /*
484     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
485     * to be an error, because it indicates the user-supplied tag is
486     * not well-formed.
487     */
488    if(U_FAILURE(*err)) {
489        goto error;
490    }
491
492    *langLength = subtagLength;
493
494    /*
495     * If no language was present, use the value of unknownLanguage
496     * instead.  Otherwise, move past any separator.
497     */
498    if (*langLength == 0) {
499        uprv_strcpy(
500            lang,
501            unknownLanguage);
502        *langLength = (int32_t)uprv_strlen(lang);
503    }
504    else if (_isIDSeparator(*position)) {
505        ++position;
506    }
507
508    subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
509    u_terminateChars(script, *scriptLength, subtagLength, err);
510
511    if(U_FAILURE(*err)) {
512        goto error;
513    }
514
515    *scriptLength = subtagLength;
516
517    if (*scriptLength > 0) {
518        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
519            /**
520             * If the script part is the "unknown" script, then don't return it.
521             **/
522            *scriptLength = 0;
523        }
524
525        /*
526         * Move past any separator.
527         */
528        if (_isIDSeparator(*position)) {
529            ++position;
530        }
531    }
532
533    subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
534    u_terminateChars(region, *regionLength, subtagLength, err);
535
536    if(U_FAILURE(*err)) {
537        goto error;
538    }
539
540    *regionLength = subtagLength;
541
542    if (*regionLength > 0) {
543        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
544            /**
545             * If the region part is the "unknown" region, then don't return it.
546             **/
547            *regionLength = 0;
548        }
549    }
550
551exit:
552
553    return (int32_t)(position - localeID);
554
555error:
556
557    /**
558     * If we get here, we have no explicit error, it's the result of an
559     * illegal argument.
560     **/
561    if (!U_FAILURE(*err)) {
562        *err = U_ILLEGAL_ARGUMENT_ERROR;
563    }
564
565    goto exit;
566}
567
568static int32_t U_CALLCONV
569createLikelySubtagsString(
570    const char* lang,
571    int32_t langLength,
572    const char* script,
573    int32_t scriptLength,
574    const char* region,
575    int32_t regionLength,
576    const char* variants,
577    int32_t variantsLength,
578    char* tag,
579    int32_t tagCapacity,
580    UErrorCode* err)
581{
582    /**
583     * ULOC_FULLNAME_CAPACITY will provide enough capacity
584     * that we can build a string that contains the language,
585     * script and region code without worrying about overrunning
586     * the user-supplied buffer.
587     **/
588    char tagBuffer[ULOC_FULLNAME_CAPACITY];
589    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
590    int32_t tagBufferLength = 0;
591
592    if(U_FAILURE(*err)) {
593        goto error;
594    }
595
596    /**
597     * Try the language with the script and region first.
598     **/
599    if (scriptLength > 0 && regionLength > 0) {
600
601        const char* likelySubtags = NULL;
602
603        tagBufferLength = createTagString(
604            lang,
605            langLength,
606            script,
607            scriptLength,
608            region,
609            regionLength,
610            NULL,
611            0,
612            tagBuffer,
613            sizeof(tagBuffer),
614            err);
615        if(U_FAILURE(*err)) {
616            goto error;
617        }
618
619        likelySubtags =
620            findLikelySubtags(
621                tagBuffer,
622                likelySubtagsBuffer,
623                sizeof(likelySubtagsBuffer),
624                err);
625        if(U_FAILURE(*err)) {
626            goto error;
627        }
628
629        if (likelySubtags != NULL) {
630            /* Always use the language tag from the
631               maximal string, since it may be more
632               specific than the one provided. */
633            return createTagStringWithAlternates(
634                        NULL,
635                        0,
636                        NULL,
637                        0,
638                        NULL,
639                        0,
640                        variants,
641                        variantsLength,
642                        likelySubtags,
643                        tag,
644                        tagCapacity,
645                        err);
646        }
647    }
648
649    /**
650     * Try the language with just the script.
651     **/
652    if (scriptLength > 0) {
653
654        const char* likelySubtags = NULL;
655
656        tagBufferLength = createTagString(
657            lang,
658            langLength,
659            script,
660            scriptLength,
661            NULL,
662            0,
663            NULL,
664            0,
665            tagBuffer,
666            sizeof(tagBuffer),
667            err);
668        if(U_FAILURE(*err)) {
669            goto error;
670        }
671
672        likelySubtags =
673            findLikelySubtags(
674                tagBuffer,
675                likelySubtagsBuffer,
676                sizeof(likelySubtagsBuffer),
677                err);
678        if(U_FAILURE(*err)) {
679            goto error;
680        }
681
682        if (likelySubtags != NULL) {
683            /* Always use the language tag from the
684               maximal string, since it may be more
685               specific than the one provided. */
686            return createTagStringWithAlternates(
687                        NULL,
688                        0,
689                        NULL,
690                        0,
691                        region,
692                        regionLength,
693                        variants,
694                        variantsLength,
695                        likelySubtags,
696                        tag,
697                        tagCapacity,
698                        err);
699        }
700    }
701
702    /**
703     * Try the language with just the region.
704     **/
705    if (regionLength > 0) {
706
707        const char* likelySubtags = NULL;
708
709        createTagString(
710            lang,
711            langLength,
712            NULL,
713            0,
714            region,
715            regionLength,
716            NULL,
717            0,
718            tagBuffer,
719            sizeof(tagBuffer),
720            err);
721        if(U_FAILURE(*err)) {
722            goto error;
723        }
724
725        likelySubtags =
726            findLikelySubtags(
727                tagBuffer,
728                likelySubtagsBuffer,
729                sizeof(likelySubtagsBuffer),
730                err);
731        if(U_FAILURE(*err)) {
732            goto error;
733        }
734
735        if (likelySubtags != NULL) {
736            /* Always use the language tag from the
737               maximal string, since it may be more
738               specific than the one provided. */
739            return createTagStringWithAlternates(
740                        NULL,
741                        0,
742                        script,
743                        scriptLength,
744                        NULL,
745                        0,
746                        variants,
747                        variantsLength,
748                        likelySubtags,
749                        tag,
750                        tagCapacity,
751                        err);
752        }
753    }
754
755    /**
756     * Finally, try just the language.
757     **/
758    {
759        const char* likelySubtags = NULL;
760
761        createTagString(
762            lang,
763            langLength,
764            NULL,
765            0,
766            NULL,
767            0,
768            NULL,
769            0,
770            tagBuffer,
771            sizeof(tagBuffer),
772            err);
773        if(U_FAILURE(*err)) {
774            goto error;
775        }
776
777        likelySubtags =
778            findLikelySubtags(
779                tagBuffer,
780                likelySubtagsBuffer,
781                sizeof(likelySubtagsBuffer),
782                err);
783        if(U_FAILURE(*err)) {
784            goto error;
785        }
786
787        if (likelySubtags != NULL) {
788            /* Always use the language tag from the
789               maximal string, since it may be more
790               specific than the one provided. */
791            return createTagStringWithAlternates(
792                        NULL,
793                        0,
794                        script,
795                        scriptLength,
796                        region,
797                        regionLength,
798                        variants,
799                        variantsLength,
800                        likelySubtags,
801                        tag,
802                        tagCapacity,
803                        err);
804        }
805    }
806
807    return u_terminateChars(
808                tag,
809                tagCapacity,
810                0,
811                err);
812
813error:
814
815    if (!U_FAILURE(*err)) {
816        *err = U_ILLEGAL_ARGUMENT_ERROR;
817    }
818
819    return -1;
820}
821
822#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
823    {   int32_t count = 0; \
824        int32_t i; \
825        for (i = 0; i < trailingLength; i++) { \
826            if (trailing[i] == '-' || trailing[i] == '_') { \
827                count = 0; \
828                if (count > 8) { \
829                    goto error; \
830                } \
831            } else if (trailing[i] == '@') { \
832                break; \
833            } else if (count > 8) { \
834                goto error; \
835            } else { \
836                count++; \
837            } \
838        } \
839    }
840
841static int32_t
842_uloc_addLikelySubtags(const char*    localeID,
843         char* maximizedLocaleID,
844         int32_t maximizedLocaleIDCapacity,
845         UErrorCode* err)
846{
847    char lang[ULOC_LANG_CAPACITY];
848    int32_t langLength = sizeof(lang);
849    char script[ULOC_SCRIPT_CAPACITY];
850    int32_t scriptLength = sizeof(script);
851    char region[ULOC_COUNTRY_CAPACITY];
852    int32_t regionLength = sizeof(region);
853    const char* trailing = "";
854    int32_t trailingLength = 0;
855    int32_t trailingIndex = 0;
856    int32_t resultLength = 0;
857
858    if(U_FAILURE(*err)) {
859        goto error;
860    }
861    else if (localeID == NULL ||
862             maximizedLocaleID == NULL ||
863             maximizedLocaleIDCapacity <= 0) {
864        goto error;
865    }
866
867    trailingIndex = parseTagString(
868        localeID,
869        lang,
870        &langLength,
871        script,
872        &scriptLength,
873        region,
874        &regionLength,
875        err);
876    if(U_FAILURE(*err)) {
877        /* Overflow indicates an illegal argument error */
878        if (*err == U_BUFFER_OVERFLOW_ERROR) {
879            *err = U_ILLEGAL_ARGUMENT_ERROR;
880        }
881
882        goto error;
883    }
884
885    /* Find the length of the trailing portion. */
886    trailing = &localeID[trailingIndex];
887    trailingLength = (int32_t)uprv_strlen(trailing);
888
889    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
890
891    resultLength =
892        createLikelySubtagsString(
893            lang,
894            langLength,
895            script,
896            scriptLength,
897            region,
898            regionLength,
899            trailing,
900            trailingLength,
901            maximizedLocaleID,
902            maximizedLocaleIDCapacity,
903            err);
904
905    if (resultLength == 0) {
906        const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
907
908        /*
909         * If we get here, we need to return localeID.
910         */
911        uprv_memcpy(
912            maximizedLocaleID,
913            localeID,
914            localIDLength <= maximizedLocaleIDCapacity ?
915                localIDLength : maximizedLocaleIDCapacity);
916
917        resultLength =
918            u_terminateChars(
919                maximizedLocaleID,
920                maximizedLocaleIDCapacity,
921                localIDLength,
922                err);
923    }
924
925    return resultLength;
926
927error:
928
929    if (!U_FAILURE(*err)) {
930        *err = U_ILLEGAL_ARGUMENT_ERROR;
931    }
932
933    return -1;
934}
935
936static int32_t
937_uloc_minimizeSubtags(const char*    localeID,
938         char* minimizedLocaleID,
939         int32_t minimizedLocaleIDCapacity,
940         UErrorCode* err)
941{
942    /**
943     * ULOC_FULLNAME_CAPACITY will provide enough capacity
944     * that we can build a string that contains the language,
945     * script and region code without worrying about overrunning
946     * the user-supplied buffer.
947     **/
948    char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
949    int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
950
951    char lang[ULOC_LANG_CAPACITY];
952    int32_t langLength = sizeof(lang);
953    char script[ULOC_SCRIPT_CAPACITY];
954    int32_t scriptLength = sizeof(script);
955    char region[ULOC_COUNTRY_CAPACITY];
956    int32_t regionLength = sizeof(region);
957    const char* trailing = "";
958    int32_t trailingLength = 0;
959    int32_t trailingIndex = 0;
960
961    if(U_FAILURE(*err)) {
962        goto error;
963    }
964    else if (localeID == NULL ||
965             minimizedLocaleID == NULL ||
966             minimizedLocaleIDCapacity <= 0) {
967        goto error;
968    }
969
970    trailingIndex =
971        parseTagString(
972            localeID,
973            lang,
974            &langLength,
975            script,
976            &scriptLength,
977            region,
978            &regionLength,
979            err);
980    if(U_FAILURE(*err)) {
981
982        /* Overflow indicates an illegal argument error */
983        if (*err == U_BUFFER_OVERFLOW_ERROR) {
984            *err = U_ILLEGAL_ARGUMENT_ERROR;
985        }
986
987        goto error;
988    }
989
990    /* Find the spot where the variants begin, if any. */
991    trailing = &localeID[trailingIndex];
992    trailingLength = (int32_t)uprv_strlen(trailing);
993
994    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
995
996    createTagString(
997        lang,
998        langLength,
999        script,
1000        scriptLength,
1001        region,
1002        regionLength,
1003        NULL,
1004        0,
1005        maximizedTagBuffer,
1006        maximizedTagBufferLength,
1007        err);
1008    if(U_FAILURE(*err)) {
1009        goto error;
1010    }
1011
1012    /**
1013     * First, we need to first get the maximization
1014     * from AddLikelySubtags.
1015     **/
1016    maximizedTagBufferLength =
1017        uloc_addLikelySubtags(
1018            maximizedTagBuffer,
1019            maximizedTagBuffer,
1020            maximizedTagBufferLength,
1021            err);
1022
1023    if(U_FAILURE(*err)) {
1024        goto error;
1025    }
1026
1027    /**
1028     * Start first with just the language.
1029     **/
1030    {
1031        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1032
1033        const int32_t tagBufferLength =
1034            createLikelySubtagsString(
1035                lang,
1036                langLength,
1037                NULL,
1038                0,
1039                NULL,
1040                0,
1041                NULL,
1042                0,
1043                tagBuffer,
1044                sizeof(tagBuffer),
1045                err);
1046
1047        if(U_FAILURE(*err)) {
1048            goto error;
1049        }
1050        else if (uprv_strnicmp(
1051                    maximizedTagBuffer,
1052                    tagBuffer,
1053                    tagBufferLength) == 0) {
1054
1055            return createTagString(
1056                        lang,
1057                        langLength,
1058                        NULL,
1059                        0,
1060                        NULL,
1061                        0,
1062                        trailing,
1063                        trailingLength,
1064                        minimizedLocaleID,
1065                        minimizedLocaleIDCapacity,
1066                        err);
1067        }
1068    }
1069
1070    /**
1071     * Next, try the language and region.
1072     **/
1073    if (regionLength > 0) {
1074
1075        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1076
1077        const int32_t tagBufferLength =
1078            createLikelySubtagsString(
1079                lang,
1080                langLength,
1081                NULL,
1082                0,
1083                region,
1084                regionLength,
1085                NULL,
1086                0,
1087                tagBuffer,
1088                sizeof(tagBuffer),
1089                err);
1090
1091        if(U_FAILURE(*err)) {
1092            goto error;
1093        }
1094        else if (uprv_strnicmp(
1095                    maximizedTagBuffer,
1096                    tagBuffer,
1097                    tagBufferLength) == 0) {
1098
1099            return createTagString(
1100                        lang,
1101                        langLength,
1102                        NULL,
1103                        0,
1104                        region,
1105                        regionLength,
1106                        trailing,
1107                        trailingLength,
1108                        minimizedLocaleID,
1109                        minimizedLocaleIDCapacity,
1110                        err);
1111        }
1112    }
1113
1114    /**
1115     * Finally, try the language and script.  This is our last chance,
1116     * since trying with all three subtags would only yield the
1117     * maximal version that we already have.
1118     **/
1119    if (scriptLength > 0 && regionLength > 0) {
1120        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1121
1122        const int32_t tagBufferLength =
1123            createLikelySubtagsString(
1124                lang,
1125                langLength,
1126                script,
1127                scriptLength,
1128                NULL,
1129                0,
1130                NULL,
1131                0,
1132                tagBuffer,
1133                sizeof(tagBuffer),
1134                err);
1135
1136        if(U_FAILURE(*err)) {
1137            goto error;
1138        }
1139        else if (uprv_strnicmp(
1140                    maximizedTagBuffer,
1141                    tagBuffer,
1142                    tagBufferLength) == 0) {
1143
1144            return createTagString(
1145                        lang,
1146                        langLength,
1147                        script,
1148                        scriptLength,
1149                        NULL,
1150                        0,
1151                        trailing,
1152                        trailingLength,
1153                        minimizedLocaleID,
1154                        minimizedLocaleIDCapacity,
1155                        err);
1156        }
1157    }
1158
1159    {
1160        /**
1161         * If we got here, return the locale ID parameter.
1162         **/
1163        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1164
1165        uprv_memcpy(
1166            minimizedLocaleID,
1167            localeID,
1168            localeIDLength <= minimizedLocaleIDCapacity ?
1169                localeIDLength : minimizedLocaleIDCapacity);
1170
1171        return u_terminateChars(
1172                    minimizedLocaleID,
1173                    minimizedLocaleIDCapacity,
1174                    localeIDLength,
1175                    err);
1176    }
1177
1178error:
1179
1180    if (!U_FAILURE(*err)) {
1181        *err = U_ILLEGAL_ARGUMENT_ERROR;
1182    }
1183
1184    return -1;
1185
1186
1187}
1188
1189static UBool
1190do_canonicalize(const char*    localeID,
1191         char* buffer,
1192         int32_t bufferCapacity,
1193         UErrorCode* err)
1194{
1195    uloc_canonicalize(
1196        localeID,
1197        buffer,
1198        bufferCapacity,
1199        err);
1200
1201    if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1202        *err == U_BUFFER_OVERFLOW_ERROR) {
1203        *err = U_ILLEGAL_ARGUMENT_ERROR;
1204
1205        return FALSE;
1206    }
1207    else if (U_FAILURE(*err)) {
1208
1209        return FALSE;
1210    }
1211    else {
1212        return TRUE;
1213    }
1214}
1215
1216U_DRAFT int32_t U_EXPORT2
1217uloc_addLikelySubtags(const char*    localeID,
1218         char* maximizedLocaleID,
1219         int32_t maximizedLocaleIDCapacity,
1220         UErrorCode* err)
1221{
1222    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1223
1224    if (!do_canonicalize(
1225        localeID,
1226        localeBuffer,
1227        sizeof(localeBuffer),
1228        err)) {
1229        return -1;
1230    }
1231    else {
1232        return _uloc_addLikelySubtags(
1233                    localeBuffer,
1234                    maximizedLocaleID,
1235                    maximizedLocaleIDCapacity,
1236                    err);
1237    }
1238}
1239
1240U_DRAFT int32_t U_EXPORT2
1241uloc_minimizeSubtags(const char*    localeID,
1242         char* minimizedLocaleID,
1243         int32_t minimizedLocaleIDCapacity,
1244         UErrorCode* err)
1245{
1246    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1247
1248    if (!do_canonicalize(
1249        localeID,
1250        localeBuffer,
1251        sizeof(localeBuffer),
1252        err)) {
1253        return -1;
1254    }
1255    else {
1256        return _uloc_minimizeSubtags(
1257                    localeBuffer,
1258                    minimizedLocaleID,
1259                    minimizedLocaleIDCapacity,
1260                    err);
1261    }
1262}
1263