1/*
2*******************************************************************************
3*
4*   Copyright (C) 1997-2012, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  loclikely.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2010feb25
14*   created by: Markus W. Scherer
15*
16*   Code for likely and minimized locale subtags, separated out from other .cpp files
17*   that then do not depend on resource bundle code and likely-subtags data.
18*/
19
20#include "unicode/utypes.h"
21#include "unicode/putil.h"
22#include "unicode/uloc.h"
23#include "unicode/ures.h"
24#include "cmemory.h"
25#include "cstring.h"
26#include "ulocimp.h"
27#include "ustr_imp.h"
28
29/**
30 * This function looks for the localeID in the likelySubtags resource.
31 *
32 * @param localeID The tag to find.
33 * @param buffer A buffer to hold the matching entry
34 * @param bufferLength The length of the output buffer
35 * @return A pointer to "buffer" if found, or a null pointer if not.
36 */
37static const char*  U_CALLCONV
38findLikelySubtags(const char* localeID,
39                  char* buffer,
40                  int32_t bufferLength,
41                  UErrorCode* err) {
42    const char* result = NULL;
43
44    if (!U_FAILURE(*err)) {
45        int32_t resLen = 0;
46        const UChar* s = NULL;
47        UErrorCode tmpErr = U_ZERO_ERROR;
48        UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
49        if (U_SUCCESS(tmpErr)) {
50            s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
51
52            if (U_FAILURE(tmpErr)) {
53                /*
54                 * If a resource is missing, it's not really an error, it's
55                 * just that we don't have any data for that particular locale ID.
56                 */
57                if (tmpErr != U_MISSING_RESOURCE_ERROR) {
58                    *err = tmpErr;
59                }
60            }
61            else if (resLen >= bufferLength) {
62                /* The buffer should never overflow. */
63                *err = U_INTERNAL_PROGRAM_ERROR;
64            }
65            else {
66                u_UCharsToChars(s, buffer, resLen + 1);
67                result = buffer;
68            }
69
70            ures_close(subtags);
71        } else {
72            *err = tmpErr;
73        }
74    }
75
76    return result;
77}
78
79/**
80 * Append a tag to a buffer, adding the separator if necessary.  The buffer
81 * must be large enough to contain the resulting tag plus any separator
82 * necessary. The tag must not be a zero-length string.
83 *
84 * @param tag The tag to add.
85 * @param tagLength The length of the tag.
86 * @param buffer The output buffer.
87 * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
88 **/
89static void U_CALLCONV
90appendTag(
91    const char* tag,
92    int32_t tagLength,
93    char* buffer,
94    int32_t* bufferLength) {
95
96    if (*bufferLength > 0) {
97        buffer[*bufferLength] = '_';
98        ++(*bufferLength);
99    }
100
101    uprv_memmove(
102        &buffer[*bufferLength],
103        tag,
104        tagLength);
105
106    *bufferLength += tagLength;
107}
108
109/**
110 * These are the canonical strings for unknown languages, scripts and regions.
111 **/
112static const char* const unknownLanguage = "und";
113static const char* const unknownScript = "Zzzz";
114static const char* const unknownRegion = "ZZ";
115
116/**
117 * Create a tag string from the supplied parameters.  The lang, script and region
118 * parameters may be NULL pointers. If they are, their corresponding length parameters
119 * must be less than or equal to 0.
120 *
121 * If any of the language, script or region parameters are empty, and the alternateTags
122 * parameter is not NULL, it will be parsed for potential language, script and region tags
123 * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
124 * it contains no language tag, the default tag for the unknown language is used.
125 *
126 * If the length of the new string exceeds the capacity of the output buffer,
127 * the function copies as many bytes to the output buffer as it can, and returns
128 * the error U_BUFFER_OVERFLOW_ERROR.
129 *
130 * If an illegal argument is provided, the function returns the error
131 * U_ILLEGAL_ARGUMENT_ERROR.
132 *
133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
134 * the tag string fits in the output buffer, but the null terminator doesn't.
135 *
136 * @param lang The language tag to use.
137 * @param langLength The length of the language tag.
138 * @param script The script tag to use.
139 * @param scriptLength The length of the script tag.
140 * @param region The region tag to use.
141 * @param regionLength The length of the region tag.
142 * @param trailing Any trailing data to append to the new tag.
143 * @param trailingLength The length of the trailing data.
144 * @param alternateTags A string containing any alternate tags.
145 * @param tag The output buffer.
146 * @param tagCapacity The capacity of the output buffer.
147 * @param err A pointer to a UErrorCode for error reporting.
148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
149 **/
150static int32_t U_CALLCONV
151createTagStringWithAlternates(
152    const char* lang,
153    int32_t langLength,
154    const char* script,
155    int32_t scriptLength,
156    const char* region,
157    int32_t regionLength,
158    const char* trailing,
159    int32_t trailingLength,
160    const char* alternateTags,
161    char* tag,
162    int32_t tagCapacity,
163    UErrorCode* err) {
164
165    if (U_FAILURE(*err)) {
166        goto error;
167    }
168    else if (tag == NULL ||
169             tagCapacity <= 0 ||
170             langLength >= ULOC_LANG_CAPACITY ||
171             scriptLength >= ULOC_SCRIPT_CAPACITY ||
172             regionLength >= ULOC_COUNTRY_CAPACITY) {
173        goto error;
174    }
175    else {
176        /**
177         * ULOC_FULLNAME_CAPACITY will provide enough capacity
178         * that we can build a string that contains the language,
179         * script and region code without worrying about overrunning
180         * the user-supplied buffer.
181         **/
182        char tagBuffer[ULOC_FULLNAME_CAPACITY];
183        int32_t tagLength = 0;
184        int32_t capacityRemaining = tagCapacity;
185        UBool regionAppended = FALSE;
186
187        if (langLength > 0) {
188            appendTag(
189                lang,
190                langLength,
191                tagBuffer,
192                &tagLength);
193        }
194        else if (alternateTags == NULL) {
195            /*
196             * Append the value for an unknown language, if
197             * we found no language.
198             */
199            appendTag(
200                unknownLanguage,
201                (int32_t)uprv_strlen(unknownLanguage),
202                tagBuffer,
203                &tagLength);
204        }
205        else {
206            /*
207             * Parse the alternateTags string for the language.
208             */
209            char alternateLang[ULOC_LANG_CAPACITY];
210            int32_t alternateLangLength = sizeof(alternateLang);
211
212            alternateLangLength =
213                uloc_getLanguage(
214                    alternateTags,
215                    alternateLang,
216                    alternateLangLength,
217                    err);
218            if(U_FAILURE(*err) ||
219                alternateLangLength >= ULOC_LANG_CAPACITY) {
220                goto error;
221            }
222            else if (alternateLangLength == 0) {
223                /*
224                 * Append the value for an unknown language, if
225                 * we found no language.
226                 */
227                appendTag(
228                    unknownLanguage,
229                    (int32_t)uprv_strlen(unknownLanguage),
230                    tagBuffer,
231                    &tagLength);
232            }
233            else {
234                appendTag(
235                    alternateLang,
236                    alternateLangLength,
237                    tagBuffer,
238                    &tagLength);
239            }
240        }
241
242        if (scriptLength > 0) {
243            appendTag(
244                script,
245                scriptLength,
246                tagBuffer,
247                &tagLength);
248        }
249        else if (alternateTags != NULL) {
250            /*
251             * Parse the alternateTags string for the script.
252             */
253            char alternateScript[ULOC_SCRIPT_CAPACITY];
254
255            const int32_t alternateScriptLength =
256                uloc_getScript(
257                    alternateTags,
258                    alternateScript,
259                    sizeof(alternateScript),
260                    err);
261
262            if (U_FAILURE(*err) ||
263                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
264                goto error;
265            }
266            else if (alternateScriptLength > 0) {
267                appendTag(
268                    alternateScript,
269                    alternateScriptLength,
270                    tagBuffer,
271                    &tagLength);
272            }
273        }
274
275        if (regionLength > 0) {
276            appendTag(
277                region,
278                regionLength,
279                tagBuffer,
280                &tagLength);
281
282            regionAppended = TRUE;
283        }
284        else if (alternateTags != NULL) {
285            /*
286             * Parse the alternateTags string for the region.
287             */
288            char alternateRegion[ULOC_COUNTRY_CAPACITY];
289
290            const int32_t alternateRegionLength =
291                uloc_getCountry(
292                    alternateTags,
293                    alternateRegion,
294                    sizeof(alternateRegion),
295                    err);
296            if (U_FAILURE(*err) ||
297                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
298                goto error;
299            }
300            else if (alternateRegionLength > 0) {
301                appendTag(
302                    alternateRegion,
303                    alternateRegionLength,
304                    tagBuffer,
305                    &tagLength);
306
307                regionAppended = TRUE;
308            }
309        }
310
311        {
312            const int32_t toCopy =
313                tagLength >= tagCapacity ? tagCapacity : tagLength;
314
315            /**
316             * Copy the partial tag from our internal buffer to the supplied
317             * target.
318             **/
319            uprv_memcpy(
320                tag,
321                tagBuffer,
322                toCopy);
323
324            capacityRemaining -= toCopy;
325        }
326
327        if (trailingLength > 0) {
328            if (*trailing != '@' && capacityRemaining > 0) {
329                tag[tagLength++] = '_';
330                --capacityRemaining;
331                if (capacityRemaining > 0 && !regionAppended) {
332                    /* extra separator is required */
333                    tag[tagLength++] = '_';
334                    --capacityRemaining;
335                }
336            }
337
338            if (capacityRemaining > 0) {
339                /*
340                 * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
341                 * don't know if the user-supplied buffers overlap.
342                 */
343                const int32_t toCopy =
344                    trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
345
346                uprv_memmove(
347                    &tag[tagLength],
348                    trailing,
349                    toCopy);
350            }
351        }
352
353        tagLength += trailingLength;
354
355        return u_terminateChars(
356                    tag,
357                    tagCapacity,
358                    tagLength,
359                    err);
360    }
361
362error:
363
364    /**
365     * An overflow indicates the locale ID passed in
366     * is ill-formed.  If we got here, and there was
367     * no previous error, it's an implicit overflow.
368     **/
369    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
370        U_SUCCESS(*err)) {
371        *err = U_ILLEGAL_ARGUMENT_ERROR;
372    }
373
374    return -1;
375}
376
377/**
378 * Create a tag string from the supplied parameters.  The lang, script and region
379 * parameters may be NULL pointers. If they are, their corresponding length parameters
380 * must be less than or equal to 0.  If the lang parameter is an empty string, the
381 * default value for an unknown language is written to the output buffer.
382 *
383 * If the length of the new string exceeds the capacity of the output buffer,
384 * the function copies as many bytes to the output buffer as it can, and returns
385 * the error U_BUFFER_OVERFLOW_ERROR.
386 *
387 * If an illegal argument is provided, the function returns the error
388 * U_ILLEGAL_ARGUMENT_ERROR.
389 *
390 * @param lang The language tag to use.
391 * @param langLength The length of the language tag.
392 * @param script The script tag to use.
393 * @param scriptLength The length of the script tag.
394 * @param region The region tag to use.
395 * @param regionLength The length of the region tag.
396 * @param trailing Any trailing data to append to the new tag.
397 * @param trailingLength The length of the trailing data.
398 * @param tag The output buffer.
399 * @param tagCapacity The capacity of the output buffer.
400 * @param err A pointer to a UErrorCode for error reporting.
401 * @return The length of the tag string, which may be greater than tagCapacity.
402 **/
403static int32_t U_CALLCONV
404createTagString(
405    const char* lang,
406    int32_t langLength,
407    const char* script,
408    int32_t scriptLength,
409    const char* region,
410    int32_t regionLength,
411    const char* trailing,
412    int32_t trailingLength,
413    char* tag,
414    int32_t tagCapacity,
415    UErrorCode* err)
416{
417    return createTagStringWithAlternates(
418                lang,
419                langLength,
420                script,
421                scriptLength,
422                region,
423                regionLength,
424                trailing,
425                trailingLength,
426                NULL,
427                tag,
428                tagCapacity,
429                err);
430}
431
432/**
433 * Parse the language, script, and region subtags from a tag string, and copy the
434 * results into the corresponding output parameters. The buffers are null-terminated,
435 * unless overflow occurs.
436 *
437 * The langLength, scriptLength, and regionLength parameters are input/output
438 * parameters, and must contain the capacity of their corresponding buffers on
439 * input.  On output, they will contain the actual length of the buffers, not
440 * including the null terminator.
441 *
442 * If the length of any of the output subtags exceeds the capacity of the corresponding
443 * buffer, the function copies as many bytes to the output buffer as it can, and returns
444 * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
445 * occurs.
446 *
447 * If an illegal argument is provided, the function returns the error
448 * U_ILLEGAL_ARGUMENT_ERROR.
449 *
450 * @param localeID The locale ID to parse.
451 * @param lang The language tag buffer.
452 * @param langLength The length of the language tag.
453 * @param script The script tag buffer.
454 * @param scriptLength The length of the script tag.
455 * @param region The region tag buffer.
456 * @param regionLength The length of the region tag.
457 * @param err A pointer to a UErrorCode for error reporting.
458 * @return The number of chars of the localeID parameter consumed.
459 **/
460static int32_t U_CALLCONV
461parseTagString(
462    const char* localeID,
463    char* lang,
464    int32_t* langLength,
465    char* script,
466    int32_t* scriptLength,
467    char* region,
468    int32_t* regionLength,
469    UErrorCode* err)
470{
471    const char* position = localeID;
472    int32_t subtagLength = 0;
473
474    if(U_FAILURE(*err) ||
475       localeID == NULL ||
476       lang == NULL ||
477       langLength == NULL ||
478       script == NULL ||
479       scriptLength == NULL ||
480       region == NULL ||
481       regionLength == NULL) {
482        goto error;
483    }
484
485    subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
486    u_terminateChars(lang, *langLength, subtagLength, err);
487
488    /*
489     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
490     * to be an error, because it indicates the user-supplied tag is
491     * not well-formed.
492     */
493    if(U_FAILURE(*err)) {
494        goto error;
495    }
496
497    *langLength = subtagLength;
498
499    /*
500     * If no language was present, use the value of unknownLanguage
501     * instead.  Otherwise, move past any separator.
502     */
503    if (*langLength == 0) {
504        uprv_strcpy(
505            lang,
506            unknownLanguage);
507        *langLength = (int32_t)uprv_strlen(lang);
508    }
509    else if (_isIDSeparator(*position)) {
510        ++position;
511    }
512
513    subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
514    u_terminateChars(script, *scriptLength, subtagLength, err);
515
516    if(U_FAILURE(*err)) {
517        goto error;
518    }
519
520    *scriptLength = subtagLength;
521
522    if (*scriptLength > 0) {
523        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
524            /**
525             * If the script part is the "unknown" script, then don't return it.
526             **/
527            *scriptLength = 0;
528        }
529
530        /*
531         * Move past any separator.
532         */
533        if (_isIDSeparator(*position)) {
534            ++position;
535        }
536    }
537
538    subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
539    u_terminateChars(region, *regionLength, subtagLength, err);
540
541    if(U_FAILURE(*err)) {
542        goto error;
543    }
544
545    *regionLength = subtagLength;
546
547    if (*regionLength > 0) {
548        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
549            /**
550             * If the region part is the "unknown" region, then don't return it.
551             **/
552            *regionLength = 0;
553        }
554    } else if (*position != 0 && *position != '@') {
555        /* back up over consumed trailing separator */
556        --position;
557    }
558
559exit:
560
561    return (int32_t)(position - localeID);
562
563error:
564
565    /**
566     * If we get here, we have no explicit error, it's the result of an
567     * illegal argument.
568     **/
569    if (!U_FAILURE(*err)) {
570        *err = U_ILLEGAL_ARGUMENT_ERROR;
571    }
572
573    goto exit;
574}
575
576static int32_t U_CALLCONV
577createLikelySubtagsString(
578    const char* lang,
579    int32_t langLength,
580    const char* script,
581    int32_t scriptLength,
582    const char* region,
583    int32_t regionLength,
584    const char* variants,
585    int32_t variantsLength,
586    char* tag,
587    int32_t tagCapacity,
588    UErrorCode* err)
589{
590    /**
591     * ULOC_FULLNAME_CAPACITY will provide enough capacity
592     * that we can build a string that contains the language,
593     * script and region code without worrying about overrunning
594     * the user-supplied buffer.
595     **/
596    char tagBuffer[ULOC_FULLNAME_CAPACITY];
597    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
598
599    if(U_FAILURE(*err)) {
600        goto error;
601    }
602
603    /**
604     * Try the language with the script and region first.
605     **/
606    if (scriptLength > 0 && regionLength > 0) {
607
608        const char* likelySubtags = NULL;
609
610        createTagString(
611            lang,
612            langLength,
613            script,
614            scriptLength,
615            region,
616            regionLength,
617            NULL,
618            0,
619            tagBuffer,
620            sizeof(tagBuffer),
621            err);
622        if(U_FAILURE(*err)) {
623            goto error;
624        }
625
626        likelySubtags =
627            findLikelySubtags(
628                tagBuffer,
629                likelySubtagsBuffer,
630                sizeof(likelySubtagsBuffer),
631                err);
632        if(U_FAILURE(*err)) {
633            goto error;
634        }
635
636        if (likelySubtags != NULL) {
637            /* Always use the language tag from the
638               maximal string, since it may be more
639               specific than the one provided. */
640            return createTagStringWithAlternates(
641                        NULL,
642                        0,
643                        NULL,
644                        0,
645                        NULL,
646                        0,
647                        variants,
648                        variantsLength,
649                        likelySubtags,
650                        tag,
651                        tagCapacity,
652                        err);
653        }
654    }
655
656    /**
657     * Try the language with just the script.
658     **/
659    if (scriptLength > 0) {
660
661        const char* likelySubtags = NULL;
662
663        createTagString(
664            lang,
665            langLength,
666            script,
667            scriptLength,
668            NULL,
669            0,
670            NULL,
671            0,
672            tagBuffer,
673            sizeof(tagBuffer),
674            err);
675        if(U_FAILURE(*err)) {
676            goto error;
677        }
678
679        likelySubtags =
680            findLikelySubtags(
681                tagBuffer,
682                likelySubtagsBuffer,
683                sizeof(likelySubtagsBuffer),
684                err);
685        if(U_FAILURE(*err)) {
686            goto error;
687        }
688
689        if (likelySubtags != NULL) {
690            /* Always use the language tag from the
691               maximal string, since it may be more
692               specific than the one provided. */
693            return createTagStringWithAlternates(
694                        NULL,
695                        0,
696                        NULL,
697                        0,
698                        region,
699                        regionLength,
700                        variants,
701                        variantsLength,
702                        likelySubtags,
703                        tag,
704                        tagCapacity,
705                        err);
706        }
707    }
708
709    /**
710     * Try the language with just the region.
711     **/
712    if (regionLength > 0) {
713
714        const char* likelySubtags = NULL;
715
716        createTagString(
717            lang,
718            langLength,
719            NULL,
720            0,
721            region,
722            regionLength,
723            NULL,
724            0,
725            tagBuffer,
726            sizeof(tagBuffer),
727            err);
728        if(U_FAILURE(*err)) {
729            goto error;
730        }
731
732        likelySubtags =
733            findLikelySubtags(
734                tagBuffer,
735                likelySubtagsBuffer,
736                sizeof(likelySubtagsBuffer),
737                err);
738        if(U_FAILURE(*err)) {
739            goto error;
740        }
741
742        if (likelySubtags != NULL) {
743            /* Always use the language tag from the
744               maximal string, since it may be more
745               specific than the one provided. */
746            return createTagStringWithAlternates(
747                        NULL,
748                        0,
749                        script,
750                        scriptLength,
751                        NULL,
752                        0,
753                        variants,
754                        variantsLength,
755                        likelySubtags,
756                        tag,
757                        tagCapacity,
758                        err);
759        }
760    }
761
762    /**
763     * Finally, try just the language.
764     **/
765    {
766        const char* likelySubtags = NULL;
767
768        createTagString(
769            lang,
770            langLength,
771            NULL,
772            0,
773            NULL,
774            0,
775            NULL,
776            0,
777            tagBuffer,
778            sizeof(tagBuffer),
779            err);
780        if(U_FAILURE(*err)) {
781            goto error;
782        }
783
784        likelySubtags =
785            findLikelySubtags(
786                tagBuffer,
787                likelySubtagsBuffer,
788                sizeof(likelySubtagsBuffer),
789                err);
790        if(U_FAILURE(*err)) {
791            goto error;
792        }
793
794        if (likelySubtags != NULL) {
795            /* Always use the language tag from the
796               maximal string, since it may be more
797               specific than the one provided. */
798            return createTagStringWithAlternates(
799                        NULL,
800                        0,
801                        script,
802                        scriptLength,
803                        region,
804                        regionLength,
805                        variants,
806                        variantsLength,
807                        likelySubtags,
808                        tag,
809                        tagCapacity,
810                        err);
811        }
812    }
813
814    return u_terminateChars(
815                tag,
816                tagCapacity,
817                0,
818                err);
819
820error:
821
822    if (!U_FAILURE(*err)) {
823        *err = U_ILLEGAL_ARGUMENT_ERROR;
824    }
825
826    return -1;
827}
828
829#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
830    {   int32_t count = 0; \
831        int32_t i; \
832        for (i = 0; i < trailingLength; i++) { \
833            if (trailing[i] == '-' || trailing[i] == '_') { \
834                count = 0; \
835                if (count > 8) { \
836                    goto error; \
837                } \
838            } else if (trailing[i] == '@') { \
839                break; \
840            } else if (count > 8) { \
841                goto error; \
842            } else { \
843                count++; \
844            } \
845        } \
846    }
847
848static int32_t
849_uloc_addLikelySubtags(const char*    localeID,
850         char* maximizedLocaleID,
851         int32_t maximizedLocaleIDCapacity,
852         UErrorCode* err)
853{
854    char lang[ULOC_LANG_CAPACITY];
855    int32_t langLength = sizeof(lang);
856    char script[ULOC_SCRIPT_CAPACITY];
857    int32_t scriptLength = sizeof(script);
858    char region[ULOC_COUNTRY_CAPACITY];
859    int32_t regionLength = sizeof(region);
860    const char* trailing = "";
861    int32_t trailingLength = 0;
862    int32_t trailingIndex = 0;
863    int32_t resultLength = 0;
864
865    if(U_FAILURE(*err)) {
866        goto error;
867    }
868    else if (localeID == NULL ||
869             maximizedLocaleID == NULL ||
870             maximizedLocaleIDCapacity <= 0) {
871        goto error;
872    }
873
874    trailingIndex = parseTagString(
875        localeID,
876        lang,
877        &langLength,
878        script,
879        &scriptLength,
880        region,
881        &regionLength,
882        err);
883    if(U_FAILURE(*err)) {
884        /* Overflow indicates an illegal argument error */
885        if (*err == U_BUFFER_OVERFLOW_ERROR) {
886            *err = U_ILLEGAL_ARGUMENT_ERROR;
887        }
888
889        goto error;
890    }
891
892    /* Find the length of the trailing portion. */
893    while (_isIDSeparator(localeID[trailingIndex])) {
894        trailingIndex++;
895    }
896    trailing = &localeID[trailingIndex];
897    trailingLength = (int32_t)uprv_strlen(trailing);
898
899    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
900
901    resultLength =
902        createLikelySubtagsString(
903            lang,
904            langLength,
905            script,
906            scriptLength,
907            region,
908            regionLength,
909            trailing,
910            trailingLength,
911            maximizedLocaleID,
912            maximizedLocaleIDCapacity,
913            err);
914
915    if (resultLength == 0) {
916        const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
917
918        /*
919         * If we get here, we need to return localeID.
920         */
921        uprv_memcpy(
922            maximizedLocaleID,
923            localeID,
924            localIDLength <= maximizedLocaleIDCapacity ?
925                localIDLength : maximizedLocaleIDCapacity);
926
927        resultLength =
928            u_terminateChars(
929                maximizedLocaleID,
930                maximizedLocaleIDCapacity,
931                localIDLength,
932                err);
933    }
934
935    return resultLength;
936
937error:
938
939    if (!U_FAILURE(*err)) {
940        *err = U_ILLEGAL_ARGUMENT_ERROR;
941    }
942
943    return -1;
944}
945
946static int32_t
947_uloc_minimizeSubtags(const char*    localeID,
948         char* minimizedLocaleID,
949         int32_t minimizedLocaleIDCapacity,
950         UErrorCode* err)
951{
952    /**
953     * ULOC_FULLNAME_CAPACITY will provide enough capacity
954     * that we can build a string that contains the language,
955     * script and region code without worrying about overrunning
956     * the user-supplied buffer.
957     **/
958    char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
959    int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
960
961    char lang[ULOC_LANG_CAPACITY];
962    int32_t langLength = sizeof(lang);
963    char script[ULOC_SCRIPT_CAPACITY];
964    int32_t scriptLength = sizeof(script);
965    char region[ULOC_COUNTRY_CAPACITY];
966    int32_t regionLength = sizeof(region);
967    const char* trailing = "";
968    int32_t trailingLength = 0;
969    int32_t trailingIndex = 0;
970
971    if(U_FAILURE(*err)) {
972        goto error;
973    }
974    else if (localeID == NULL ||
975             minimizedLocaleID == NULL ||
976             minimizedLocaleIDCapacity <= 0) {
977        goto error;
978    }
979
980    trailingIndex =
981        parseTagString(
982            localeID,
983            lang,
984            &langLength,
985            script,
986            &scriptLength,
987            region,
988            &regionLength,
989            err);
990    if(U_FAILURE(*err)) {
991
992        /* Overflow indicates an illegal argument error */
993        if (*err == U_BUFFER_OVERFLOW_ERROR) {
994            *err = U_ILLEGAL_ARGUMENT_ERROR;
995        }
996
997        goto error;
998    }
999
1000    /* Find the spot where the variants or the keywords begin, if any. */
1001    while (_isIDSeparator(localeID[trailingIndex])) {
1002        trailingIndex++;
1003    }
1004    trailing = &localeID[trailingIndex];
1005    trailingLength = (int32_t)uprv_strlen(trailing);
1006
1007    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1008
1009    createTagString(
1010        lang,
1011        langLength,
1012        script,
1013        scriptLength,
1014        region,
1015        regionLength,
1016        NULL,
1017        0,
1018        maximizedTagBuffer,
1019        maximizedTagBufferLength,
1020        err);
1021    if(U_FAILURE(*err)) {
1022        goto error;
1023    }
1024
1025    /**
1026     * First, we need to first get the maximization
1027     * from AddLikelySubtags.
1028     **/
1029    maximizedTagBufferLength =
1030        uloc_addLikelySubtags(
1031            maximizedTagBuffer,
1032            maximizedTagBuffer,
1033            maximizedTagBufferLength,
1034            err);
1035
1036    if(U_FAILURE(*err)) {
1037        goto error;
1038    }
1039
1040    /**
1041     * Start first with just the language.
1042     **/
1043    {
1044        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1045
1046        const int32_t tagBufferLength =
1047            createLikelySubtagsString(
1048                lang,
1049                langLength,
1050                NULL,
1051                0,
1052                NULL,
1053                0,
1054                NULL,
1055                0,
1056                tagBuffer,
1057                sizeof(tagBuffer),
1058                err);
1059
1060        if(U_FAILURE(*err)) {
1061            goto error;
1062        }
1063        else if (uprv_strnicmp(
1064                    maximizedTagBuffer,
1065                    tagBuffer,
1066                    tagBufferLength) == 0) {
1067
1068            return createTagString(
1069                        lang,
1070                        langLength,
1071                        NULL,
1072                        0,
1073                        NULL,
1074                        0,
1075                        trailing,
1076                        trailingLength,
1077                        minimizedLocaleID,
1078                        minimizedLocaleIDCapacity,
1079                        err);
1080        }
1081    }
1082
1083    /**
1084     * Next, try the language and region.
1085     **/
1086    if (regionLength > 0) {
1087
1088        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1089
1090        const int32_t tagBufferLength =
1091            createLikelySubtagsString(
1092                lang,
1093                langLength,
1094                NULL,
1095                0,
1096                region,
1097                regionLength,
1098                NULL,
1099                0,
1100                tagBuffer,
1101                sizeof(tagBuffer),
1102                err);
1103
1104        if(U_FAILURE(*err)) {
1105            goto error;
1106        }
1107        else if (uprv_strnicmp(
1108                    maximizedTagBuffer,
1109                    tagBuffer,
1110                    tagBufferLength) == 0) {
1111
1112            return createTagString(
1113                        lang,
1114                        langLength,
1115                        NULL,
1116                        0,
1117                        region,
1118                        regionLength,
1119                        trailing,
1120                        trailingLength,
1121                        minimizedLocaleID,
1122                        minimizedLocaleIDCapacity,
1123                        err);
1124        }
1125    }
1126
1127    /**
1128     * Finally, try the language and script.  This is our last chance,
1129     * since trying with all three subtags would only yield the
1130     * maximal version that we already have.
1131     **/
1132    if (scriptLength > 0 && regionLength > 0) {
1133        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1134
1135        const int32_t tagBufferLength =
1136            createLikelySubtagsString(
1137                lang,
1138                langLength,
1139                script,
1140                scriptLength,
1141                NULL,
1142                0,
1143                NULL,
1144                0,
1145                tagBuffer,
1146                sizeof(tagBuffer),
1147                err);
1148
1149        if(U_FAILURE(*err)) {
1150            goto error;
1151        }
1152        else if (uprv_strnicmp(
1153                    maximizedTagBuffer,
1154                    tagBuffer,
1155                    tagBufferLength) == 0) {
1156
1157            return createTagString(
1158                        lang,
1159                        langLength,
1160                        script,
1161                        scriptLength,
1162                        NULL,
1163                        0,
1164                        trailing,
1165                        trailingLength,
1166                        minimizedLocaleID,
1167                        minimizedLocaleIDCapacity,
1168                        err);
1169        }
1170    }
1171
1172    {
1173        /**
1174         * If we got here, return the locale ID parameter.
1175         **/
1176        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1177
1178        uprv_memcpy(
1179            minimizedLocaleID,
1180            localeID,
1181            localeIDLength <= minimizedLocaleIDCapacity ?
1182                localeIDLength : minimizedLocaleIDCapacity);
1183
1184        return u_terminateChars(
1185                    minimizedLocaleID,
1186                    minimizedLocaleIDCapacity,
1187                    localeIDLength,
1188                    err);
1189    }
1190
1191error:
1192
1193    if (!U_FAILURE(*err)) {
1194        *err = U_ILLEGAL_ARGUMENT_ERROR;
1195    }
1196
1197    return -1;
1198
1199
1200}
1201
1202static UBool
1203do_canonicalize(const char*    localeID,
1204         char* buffer,
1205         int32_t bufferCapacity,
1206         UErrorCode* err)
1207{
1208    uloc_canonicalize(
1209        localeID,
1210        buffer,
1211        bufferCapacity,
1212        err);
1213
1214    if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1215        *err == U_BUFFER_OVERFLOW_ERROR) {
1216        *err = U_ILLEGAL_ARGUMENT_ERROR;
1217
1218        return FALSE;
1219    }
1220    else if (U_FAILURE(*err)) {
1221
1222        return FALSE;
1223    }
1224    else {
1225        return TRUE;
1226    }
1227}
1228
1229U_CAPI int32_t U_EXPORT2
1230uloc_addLikelySubtags(const char*    localeID,
1231         char* maximizedLocaleID,
1232         int32_t maximizedLocaleIDCapacity,
1233         UErrorCode* err)
1234{
1235    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1236
1237    if (!do_canonicalize(
1238        localeID,
1239        localeBuffer,
1240        sizeof(localeBuffer),
1241        err)) {
1242        return -1;
1243    }
1244    else {
1245        return _uloc_addLikelySubtags(
1246                    localeBuffer,
1247                    maximizedLocaleID,
1248                    maximizedLocaleIDCapacity,
1249                    err);
1250    }
1251}
1252
1253U_CAPI int32_t U_EXPORT2
1254uloc_minimizeSubtags(const char*    localeID,
1255         char* minimizedLocaleID,
1256         int32_t minimizedLocaleIDCapacity,
1257         UErrorCode* err)
1258{
1259    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1260
1261    if (!do_canonicalize(
1262        localeID,
1263        localeBuffer,
1264        sizeof(localeBuffer),
1265        err)) {
1266        return -1;
1267    }
1268    else {
1269        return _uloc_minimizeSubtags(
1270                    localeBuffer,
1271                    minimizedLocaleID,
1272                    minimizedLocaleIDCapacity,
1273                    err);
1274    }
1275}
1276