1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 1997-2016, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*   file name:  loclikely.cpp
11*   encoding:   UTF-8
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 2010feb25
16*   created by: Markus W. Scherer
17*
18*   Code for likely and minimized locale subtags, separated out from other .cpp files
19*   that then do not depend on resource bundle code and likely-subtags data.
20*/
21
22#include "unicode/utypes.h"
23#include "unicode/locid.h"
24#include "unicode/putil.h"
25#include "unicode/uchar.h"
26#include "unicode/uloc.h"
27#include "unicode/ures.h"
28#include "unicode/uscript.h"
29#include "cmemory.h"
30#include "cstring.h"
31#include "ulocimp.h"
32#include "ustr_imp.h"
33
34/**
35 * This function looks for the localeID in the likelySubtags resource.
36 *
37 * @param localeID The tag to find.
38 * @param buffer A buffer to hold the matching entry
39 * @param bufferLength The length of the output buffer
40 * @return A pointer to "buffer" if found, or a null pointer if not.
41 */
42static const char*  U_CALLCONV
43findLikelySubtags(const char* localeID,
44                  char* buffer,
45                  int32_t bufferLength,
46                  UErrorCode* err) {
47    const char* result = NULL;
48
49    if (!U_FAILURE(*err)) {
50        int32_t resLen = 0;
51        const UChar* s = NULL;
52        UErrorCode tmpErr = U_ZERO_ERROR;
53        UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
54        if (U_SUCCESS(tmpErr)) {
55            s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
56
57            if (U_FAILURE(tmpErr)) {
58                /*
59                 * If a resource is missing, it's not really an error, it's
60                 * just that we don't have any data for that particular locale ID.
61                 */
62                if (tmpErr != U_MISSING_RESOURCE_ERROR) {
63                    *err = tmpErr;
64                }
65            }
66            else if (resLen >= bufferLength) {
67                /* The buffer should never overflow. */
68                *err = U_INTERNAL_PROGRAM_ERROR;
69            }
70            else {
71                u_UCharsToChars(s, buffer, resLen + 1);
72                result = buffer;
73            }
74
75            ures_close(subtags);
76        } else {
77            *err = tmpErr;
78        }
79    }
80
81    return result;
82}
83
84/**
85 * Append a tag to a buffer, adding the separator if necessary.  The buffer
86 * must be large enough to contain the resulting tag plus any separator
87 * necessary. The tag must not be a zero-length string.
88 *
89 * @param tag The tag to add.
90 * @param tagLength The length of the tag.
91 * @param buffer The output buffer.
92 * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
93 **/
94static void U_CALLCONV
95appendTag(
96    const char* tag,
97    int32_t tagLength,
98    char* buffer,
99    int32_t* bufferLength) {
100
101    if (*bufferLength > 0) {
102        buffer[*bufferLength] = '_';
103        ++(*bufferLength);
104    }
105
106    uprv_memmove(
107        &buffer[*bufferLength],
108        tag,
109        tagLength);
110
111    *bufferLength += tagLength;
112}
113
114/**
115 * These are the canonical strings for unknown languages, scripts and regions.
116 **/
117static const char* const unknownLanguage = "und";
118static const char* const unknownScript = "Zzzz";
119static const char* const unknownRegion = "ZZ";
120
121/**
122 * Create a tag string from the supplied parameters.  The lang, script and region
123 * parameters may be NULL pointers. If they are, their corresponding length parameters
124 * must be less than or equal to 0.
125 *
126 * If any of the language, script or region parameters are empty, and the alternateTags
127 * parameter is not NULL, it will be parsed for potential language, script and region tags
128 * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
129 * it contains no language tag, the default tag for the unknown language is used.
130 *
131 * If the length of the new string exceeds the capacity of the output buffer,
132 * the function copies as many bytes to the output buffer as it can, and returns
133 * the error U_BUFFER_OVERFLOW_ERROR.
134 *
135 * If an illegal argument is provided, the function returns the error
136 * U_ILLEGAL_ARGUMENT_ERROR.
137 *
138 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
139 * the tag string fits in the output buffer, but the null terminator doesn't.
140 *
141 * @param lang The language tag to use.
142 * @param langLength The length of the language tag.
143 * @param script The script tag to use.
144 * @param scriptLength The length of the script tag.
145 * @param region The region tag to use.
146 * @param regionLength The length of the region tag.
147 * @param trailing Any trailing data to append to the new tag.
148 * @param trailingLength The length of the trailing data.
149 * @param alternateTags A string containing any alternate tags.
150 * @param tag The output buffer.
151 * @param tagCapacity The capacity of the output buffer.
152 * @param err A pointer to a UErrorCode for error reporting.
153 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
154 **/
155static int32_t U_CALLCONV
156createTagStringWithAlternates(
157    const char* lang,
158    int32_t langLength,
159    const char* script,
160    int32_t scriptLength,
161    const char* region,
162    int32_t regionLength,
163    const char* trailing,
164    int32_t trailingLength,
165    const char* alternateTags,
166    char* tag,
167    int32_t tagCapacity,
168    UErrorCode* err) {
169
170    if (U_FAILURE(*err)) {
171        goto error;
172    }
173    else if (tag == NULL ||
174             tagCapacity <= 0 ||
175             langLength >= ULOC_LANG_CAPACITY ||
176             scriptLength >= ULOC_SCRIPT_CAPACITY ||
177             regionLength >= ULOC_COUNTRY_CAPACITY) {
178        goto error;
179    }
180    else {
181        /**
182         * ULOC_FULLNAME_CAPACITY will provide enough capacity
183         * that we can build a string that contains the language,
184         * script and region code without worrying about overrunning
185         * the user-supplied buffer.
186         **/
187        char tagBuffer[ULOC_FULLNAME_CAPACITY];
188        int32_t tagLength = 0;
189        int32_t capacityRemaining = tagCapacity;
190        UBool regionAppended = FALSE;
191
192        if (langLength > 0) {
193            appendTag(
194                lang,
195                langLength,
196                tagBuffer,
197                &tagLength);
198        }
199        else if (alternateTags == NULL) {
200            /*
201             * Append the value for an unknown language, if
202             * we found no language.
203             */
204            appendTag(
205                unknownLanguage,
206                (int32_t)uprv_strlen(unknownLanguage),
207                tagBuffer,
208                &tagLength);
209        }
210        else {
211            /*
212             * Parse the alternateTags string for the language.
213             */
214            char alternateLang[ULOC_LANG_CAPACITY];
215            int32_t alternateLangLength = sizeof(alternateLang);
216
217            alternateLangLength =
218                uloc_getLanguage(
219                    alternateTags,
220                    alternateLang,
221                    alternateLangLength,
222                    err);
223            if(U_FAILURE(*err) ||
224                alternateLangLength >= ULOC_LANG_CAPACITY) {
225                goto error;
226            }
227            else if (alternateLangLength == 0) {
228                /*
229                 * Append the value for an unknown language, if
230                 * we found no language.
231                 */
232                appendTag(
233                    unknownLanguage,
234                    (int32_t)uprv_strlen(unknownLanguage),
235                    tagBuffer,
236                    &tagLength);
237            }
238            else {
239                appendTag(
240                    alternateLang,
241                    alternateLangLength,
242                    tagBuffer,
243                    &tagLength);
244            }
245        }
246
247        if (scriptLength > 0) {
248            appendTag(
249                script,
250                scriptLength,
251                tagBuffer,
252                &tagLength);
253        }
254        else if (alternateTags != NULL) {
255            /*
256             * Parse the alternateTags string for the script.
257             */
258            char alternateScript[ULOC_SCRIPT_CAPACITY];
259
260            const int32_t alternateScriptLength =
261                uloc_getScript(
262                    alternateTags,
263                    alternateScript,
264                    sizeof(alternateScript),
265                    err);
266
267            if (U_FAILURE(*err) ||
268                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
269                goto error;
270            }
271            else if (alternateScriptLength > 0) {
272                appendTag(
273                    alternateScript,
274                    alternateScriptLength,
275                    tagBuffer,
276                    &tagLength);
277            }
278        }
279
280        if (regionLength > 0) {
281            appendTag(
282                region,
283                regionLength,
284                tagBuffer,
285                &tagLength);
286
287            regionAppended = TRUE;
288        }
289        else if (alternateTags != NULL) {
290            /*
291             * Parse the alternateTags string for the region.
292             */
293            char alternateRegion[ULOC_COUNTRY_CAPACITY];
294
295            const int32_t alternateRegionLength =
296                uloc_getCountry(
297                    alternateTags,
298                    alternateRegion,
299                    sizeof(alternateRegion),
300                    err);
301            if (U_FAILURE(*err) ||
302                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
303                goto error;
304            }
305            else if (alternateRegionLength > 0) {
306                appendTag(
307                    alternateRegion,
308                    alternateRegionLength,
309                    tagBuffer,
310                    &tagLength);
311
312                regionAppended = TRUE;
313            }
314        }
315
316        {
317            const int32_t toCopy =
318                tagLength >= tagCapacity ? tagCapacity : tagLength;
319
320            /**
321             * Copy the partial tag from our internal buffer to the supplied
322             * target.
323             **/
324            uprv_memcpy(
325                tag,
326                tagBuffer,
327                toCopy);
328
329            capacityRemaining -= toCopy;
330        }
331
332        if (trailingLength > 0) {
333            if (*trailing != '@' && capacityRemaining > 0) {
334                tag[tagLength++] = '_';
335                --capacityRemaining;
336                if (capacityRemaining > 0 && !regionAppended) {
337                    /* extra separator is required */
338                    tag[tagLength++] = '_';
339                    --capacityRemaining;
340                }
341            }
342
343            if (capacityRemaining > 0) {
344                /*
345                 * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
346                 * don't know if the user-supplied buffers overlap.
347                 */
348                const int32_t toCopy =
349                    trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
350
351                uprv_memmove(
352                    &tag[tagLength],
353                    trailing,
354                    toCopy);
355            }
356        }
357
358        tagLength += trailingLength;
359
360        return u_terminateChars(
361                    tag,
362                    tagCapacity,
363                    tagLength,
364                    err);
365    }
366
367error:
368
369    /**
370     * An overflow indicates the locale ID passed in
371     * is ill-formed.  If we got here, and there was
372     * no previous error, it's an implicit overflow.
373     **/
374    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
375        U_SUCCESS(*err)) {
376        *err = U_ILLEGAL_ARGUMENT_ERROR;
377    }
378
379    return -1;
380}
381
382/**
383 * Create a tag string from the supplied parameters.  The lang, script and region
384 * parameters may be NULL pointers. If they are, their corresponding length parameters
385 * must be less than or equal to 0.  If the lang parameter is an empty string, the
386 * default value for an unknown language is written to the output buffer.
387 *
388 * If the length of the new string exceeds the capacity of the output buffer,
389 * the function copies as many bytes to the output buffer as it can, and returns
390 * the error U_BUFFER_OVERFLOW_ERROR.
391 *
392 * If an illegal argument is provided, the function returns the error
393 * U_ILLEGAL_ARGUMENT_ERROR.
394 *
395 * @param lang The language tag to use.
396 * @param langLength The length of the language tag.
397 * @param script The script tag to use.
398 * @param scriptLength The length of the script tag.
399 * @param region The region tag to use.
400 * @param regionLength The length of the region tag.
401 * @param trailing Any trailing data to append to the new tag.
402 * @param trailingLength The length of the trailing data.
403 * @param tag The output buffer.
404 * @param tagCapacity The capacity of the output buffer.
405 * @param err A pointer to a UErrorCode for error reporting.
406 * @return The length of the tag string, which may be greater than tagCapacity.
407 **/
408static int32_t U_CALLCONV
409createTagString(
410    const char* lang,
411    int32_t langLength,
412    const char* script,
413    int32_t scriptLength,
414    const char* region,
415    int32_t regionLength,
416    const char* trailing,
417    int32_t trailingLength,
418    char* tag,
419    int32_t tagCapacity,
420    UErrorCode* err)
421{
422    return createTagStringWithAlternates(
423                lang,
424                langLength,
425                script,
426                scriptLength,
427                region,
428                regionLength,
429                trailing,
430                trailingLength,
431                NULL,
432                tag,
433                tagCapacity,
434                err);
435}
436
437/**
438 * Parse the language, script, and region subtags from a tag string, and copy the
439 * results into the corresponding output parameters. The buffers are null-terminated,
440 * unless overflow occurs.
441 *
442 * The langLength, scriptLength, and regionLength parameters are input/output
443 * parameters, and must contain the capacity of their corresponding buffers on
444 * input.  On output, they will contain the actual length of the buffers, not
445 * including the null terminator.
446 *
447 * If the length of any of the output subtags exceeds the capacity of the corresponding
448 * buffer, the function copies as many bytes to the output buffer as it can, and returns
449 * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
450 * occurs.
451 *
452 * If an illegal argument is provided, the function returns the error
453 * U_ILLEGAL_ARGUMENT_ERROR.
454 *
455 * @param localeID The locale ID to parse.
456 * @param lang The language tag buffer.
457 * @param langLength The length of the language tag.
458 * @param script The script tag buffer.
459 * @param scriptLength The length of the script tag.
460 * @param region The region tag buffer.
461 * @param regionLength The length of the region tag.
462 * @param err A pointer to a UErrorCode for error reporting.
463 * @return The number of chars of the localeID parameter consumed.
464 **/
465static int32_t U_CALLCONV
466parseTagString(
467    const char* localeID,
468    char* lang,
469    int32_t* langLength,
470    char* script,
471    int32_t* scriptLength,
472    char* region,
473    int32_t* regionLength,
474    UErrorCode* err)
475{
476    const char* position = localeID;
477    int32_t subtagLength = 0;
478
479    if(U_FAILURE(*err) ||
480       localeID == NULL ||
481       lang == NULL ||
482       langLength == NULL ||
483       script == NULL ||
484       scriptLength == NULL ||
485       region == NULL ||
486       regionLength == NULL) {
487        goto error;
488    }
489
490    subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
491    u_terminateChars(lang, *langLength, subtagLength, err);
492
493    /*
494     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
495     * to be an error, because it indicates the user-supplied tag is
496     * not well-formed.
497     */
498    if(U_FAILURE(*err)) {
499        goto error;
500    }
501
502    *langLength = subtagLength;
503
504    /*
505     * If no language was present, use the value of unknownLanguage
506     * instead.  Otherwise, move past any separator.
507     */
508    if (*langLength == 0) {
509        uprv_strcpy(
510            lang,
511            unknownLanguage);
512        *langLength = (int32_t)uprv_strlen(lang);
513    }
514    if (_isIDSeparator(*position)) {
515        ++position;
516    }
517
518    subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
519    u_terminateChars(script, *scriptLength, subtagLength, err);
520
521    if(U_FAILURE(*err)) {
522        goto error;
523    }
524
525    *scriptLength = subtagLength;
526
527    if (*scriptLength > 0) {
528        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
529            /**
530             * If the script part is the "unknown" script, then don't return it.
531             **/
532            *scriptLength = 0;
533        }
534
535        /*
536         * Move past any separator.
537         */
538        if (_isIDSeparator(*position)) {
539            ++position;
540        }
541    }
542
543    subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
544    u_terminateChars(region, *regionLength, subtagLength, err);
545
546    if(U_FAILURE(*err)) {
547        goto error;
548    }
549
550    *regionLength = subtagLength;
551
552    if (*regionLength > 0) {
553        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
554            /**
555             * If the region part is the "unknown" region, then don't return it.
556             **/
557            *regionLength = 0;
558        }
559    } else if (*position != 0 && *position != '@') {
560        /* back up over consumed trailing separator */
561        --position;
562    }
563
564exit:
565
566    return (int32_t)(position - localeID);
567
568error:
569
570    /**
571     * If we get here, we have no explicit error, it's the result of an
572     * illegal argument.
573     **/
574    if (!U_FAILURE(*err)) {
575        *err = U_ILLEGAL_ARGUMENT_ERROR;
576    }
577
578    goto exit;
579}
580
581static int32_t U_CALLCONV
582createLikelySubtagsString(
583    const char* lang,
584    int32_t langLength,
585    const char* script,
586    int32_t scriptLength,
587    const char* region,
588    int32_t regionLength,
589    const char* variants,
590    int32_t variantsLength,
591    char* tag,
592    int32_t tagCapacity,
593    UErrorCode* err)
594{
595    /**
596     * ULOC_FULLNAME_CAPACITY will provide enough capacity
597     * that we can build a string that contains the language,
598     * script and region code without worrying about overrunning
599     * the user-supplied buffer.
600     **/
601    char tagBuffer[ULOC_FULLNAME_CAPACITY];
602    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
603
604    if(U_FAILURE(*err)) {
605        goto error;
606    }
607
608    /**
609     * Try the language with the script and region first.
610     **/
611    if (scriptLength > 0 && regionLength > 0) {
612
613        const char* likelySubtags = NULL;
614
615        createTagString(
616            lang,
617            langLength,
618            script,
619            scriptLength,
620            region,
621            regionLength,
622            NULL,
623            0,
624            tagBuffer,
625            sizeof(tagBuffer),
626            err);
627        if(U_FAILURE(*err)) {
628            goto error;
629        }
630
631        likelySubtags =
632            findLikelySubtags(
633                tagBuffer,
634                likelySubtagsBuffer,
635                sizeof(likelySubtagsBuffer),
636                err);
637        if(U_FAILURE(*err)) {
638            goto error;
639        }
640
641        if (likelySubtags != NULL) {
642            /* Always use the language tag from the
643               maximal string, since it may be more
644               specific than the one provided. */
645            return createTagStringWithAlternates(
646                        NULL,
647                        0,
648                        NULL,
649                        0,
650                        NULL,
651                        0,
652                        variants,
653                        variantsLength,
654                        likelySubtags,
655                        tag,
656                        tagCapacity,
657                        err);
658        }
659    }
660
661    /**
662     * Try the language with just the script.
663     **/
664    if (scriptLength > 0) {
665
666        const char* likelySubtags = NULL;
667
668        createTagString(
669            lang,
670            langLength,
671            script,
672            scriptLength,
673            NULL,
674            0,
675            NULL,
676            0,
677            tagBuffer,
678            sizeof(tagBuffer),
679            err);
680        if(U_FAILURE(*err)) {
681            goto error;
682        }
683
684        likelySubtags =
685            findLikelySubtags(
686                tagBuffer,
687                likelySubtagsBuffer,
688                sizeof(likelySubtagsBuffer),
689                err);
690        if(U_FAILURE(*err)) {
691            goto error;
692        }
693
694        if (likelySubtags != NULL) {
695            /* Always use the language tag from the
696               maximal string, since it may be more
697               specific than the one provided. */
698            return createTagStringWithAlternates(
699                        NULL,
700                        0,
701                        NULL,
702                        0,
703                        region,
704                        regionLength,
705                        variants,
706                        variantsLength,
707                        likelySubtags,
708                        tag,
709                        tagCapacity,
710                        err);
711        }
712    }
713
714    /**
715     * Try the language with just the region.
716     **/
717    if (regionLength > 0) {
718
719        const char* likelySubtags = NULL;
720
721        createTagString(
722            lang,
723            langLength,
724            NULL,
725            0,
726            region,
727            regionLength,
728            NULL,
729            0,
730            tagBuffer,
731            sizeof(tagBuffer),
732            err);
733        if(U_FAILURE(*err)) {
734            goto error;
735        }
736
737        likelySubtags =
738            findLikelySubtags(
739                tagBuffer,
740                likelySubtagsBuffer,
741                sizeof(likelySubtagsBuffer),
742                err);
743        if(U_FAILURE(*err)) {
744            goto error;
745        }
746
747        if (likelySubtags != NULL) {
748            /* Always use the language tag from the
749               maximal string, since it may be more
750               specific than the one provided. */
751            return createTagStringWithAlternates(
752                        NULL,
753                        0,
754                        script,
755                        scriptLength,
756                        NULL,
757                        0,
758                        variants,
759                        variantsLength,
760                        likelySubtags,
761                        tag,
762                        tagCapacity,
763                        err);
764        }
765    }
766
767    /**
768     * Finally, try just the language.
769     **/
770    {
771        const char* likelySubtags = NULL;
772
773        createTagString(
774            lang,
775            langLength,
776            NULL,
777            0,
778            NULL,
779            0,
780            NULL,
781            0,
782            tagBuffer,
783            sizeof(tagBuffer),
784            err);
785        if(U_FAILURE(*err)) {
786            goto error;
787        }
788
789        likelySubtags =
790            findLikelySubtags(
791                tagBuffer,
792                likelySubtagsBuffer,
793                sizeof(likelySubtagsBuffer),
794                err);
795        if(U_FAILURE(*err)) {
796            goto error;
797        }
798
799        if (likelySubtags != NULL) {
800            /* Always use the language tag from the
801               maximal string, since it may be more
802               specific than the one provided. */
803            return createTagStringWithAlternates(
804                        NULL,
805                        0,
806                        script,
807                        scriptLength,
808                        region,
809                        regionLength,
810                        variants,
811                        variantsLength,
812                        likelySubtags,
813                        tag,
814                        tagCapacity,
815                        err);
816        }
817    }
818
819    return u_terminateChars(
820                tag,
821                tagCapacity,
822                0,
823                err);
824
825error:
826
827    if (!U_FAILURE(*err)) {
828        *err = U_ILLEGAL_ARGUMENT_ERROR;
829    }
830
831    return -1;
832}
833
834#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
835    {   int32_t count = 0; \
836        int32_t i; \
837        for (i = 0; i < trailingLength; i++) { \
838            if (trailing[i] == '-' || trailing[i] == '_') { \
839                count = 0; \
840                if (count > 8) { \
841                    goto error; \
842                } \
843            } else if (trailing[i] == '@') { \
844                break; \
845            } else if (count > 8) { \
846                goto error; \
847            } else { \
848                count++; \
849            } \
850        } \
851    }
852
853static int32_t
854_uloc_addLikelySubtags(const char*    localeID,
855         char* maximizedLocaleID,
856         int32_t maximizedLocaleIDCapacity,
857         UErrorCode* err)
858{
859    char lang[ULOC_LANG_CAPACITY];
860    int32_t langLength = sizeof(lang);
861    char script[ULOC_SCRIPT_CAPACITY];
862    int32_t scriptLength = sizeof(script);
863    char region[ULOC_COUNTRY_CAPACITY];
864    int32_t regionLength = sizeof(region);
865    const char* trailing = "";
866    int32_t trailingLength = 0;
867    int32_t trailingIndex = 0;
868    int32_t resultLength = 0;
869
870    if(U_FAILURE(*err)) {
871        goto error;
872    }
873    else if (localeID == NULL ||
874             maximizedLocaleID == NULL ||
875             maximizedLocaleIDCapacity <= 0) {
876        goto error;
877    }
878
879    trailingIndex = parseTagString(
880        localeID,
881        lang,
882        &langLength,
883        script,
884        &scriptLength,
885        region,
886        &regionLength,
887        err);
888    if(U_FAILURE(*err)) {
889        /* Overflow indicates an illegal argument error */
890        if (*err == U_BUFFER_OVERFLOW_ERROR) {
891            *err = U_ILLEGAL_ARGUMENT_ERROR;
892        }
893
894        goto error;
895    }
896
897    /* Find the length of the trailing portion. */
898    while (_isIDSeparator(localeID[trailingIndex])) {
899        trailingIndex++;
900    }
901    trailing = &localeID[trailingIndex];
902    trailingLength = (int32_t)uprv_strlen(trailing);
903
904    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
905
906    resultLength =
907        createLikelySubtagsString(
908            lang,
909            langLength,
910            script,
911            scriptLength,
912            region,
913            regionLength,
914            trailing,
915            trailingLength,
916            maximizedLocaleID,
917            maximizedLocaleIDCapacity,
918            err);
919
920    if (resultLength == 0) {
921        const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
922
923        /*
924         * If we get here, we need to return localeID.
925         */
926        uprv_memcpy(
927            maximizedLocaleID,
928            localeID,
929            localIDLength <= maximizedLocaleIDCapacity ?
930                localIDLength : maximizedLocaleIDCapacity);
931
932        resultLength =
933            u_terminateChars(
934                maximizedLocaleID,
935                maximizedLocaleIDCapacity,
936                localIDLength,
937                err);
938    }
939
940    return resultLength;
941
942error:
943
944    if (!U_FAILURE(*err)) {
945        *err = U_ILLEGAL_ARGUMENT_ERROR;
946    }
947
948    return -1;
949}
950
951static int32_t
952_uloc_minimizeSubtags(const char*    localeID,
953         char* minimizedLocaleID,
954         int32_t minimizedLocaleIDCapacity,
955         UErrorCode* err)
956{
957    /**
958     * ULOC_FULLNAME_CAPACITY will provide enough capacity
959     * that we can build a string that contains the language,
960     * script and region code without worrying about overrunning
961     * the user-supplied buffer.
962     **/
963    char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
964    int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
965
966    char lang[ULOC_LANG_CAPACITY];
967    int32_t langLength = sizeof(lang);
968    char script[ULOC_SCRIPT_CAPACITY];
969    int32_t scriptLength = sizeof(script);
970    char region[ULOC_COUNTRY_CAPACITY];
971    int32_t regionLength = sizeof(region);
972    const char* trailing = "";
973    int32_t trailingLength = 0;
974    int32_t trailingIndex = 0;
975
976    if(U_FAILURE(*err)) {
977        goto error;
978    }
979    else if (localeID == NULL ||
980             minimizedLocaleID == NULL ||
981             minimizedLocaleIDCapacity <= 0) {
982        goto error;
983    }
984
985    trailingIndex =
986        parseTagString(
987            localeID,
988            lang,
989            &langLength,
990            script,
991            &scriptLength,
992            region,
993            &regionLength,
994            err);
995    if(U_FAILURE(*err)) {
996
997        /* Overflow indicates an illegal argument error */
998        if (*err == U_BUFFER_OVERFLOW_ERROR) {
999            *err = U_ILLEGAL_ARGUMENT_ERROR;
1000        }
1001
1002        goto error;
1003    }
1004
1005    /* Find the spot where the variants or the keywords begin, if any. */
1006    while (_isIDSeparator(localeID[trailingIndex])) {
1007        trailingIndex++;
1008    }
1009    trailing = &localeID[trailingIndex];
1010    trailingLength = (int32_t)uprv_strlen(trailing);
1011
1012    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1013
1014    createTagString(
1015        lang,
1016        langLength,
1017        script,
1018        scriptLength,
1019        region,
1020        regionLength,
1021        NULL,
1022        0,
1023        maximizedTagBuffer,
1024        maximizedTagBufferLength,
1025        err);
1026    if(U_FAILURE(*err)) {
1027        goto error;
1028    }
1029
1030    /**
1031     * First, we need to first get the maximization
1032     * from AddLikelySubtags.
1033     **/
1034    maximizedTagBufferLength =
1035        uloc_addLikelySubtags(
1036            maximizedTagBuffer,
1037            maximizedTagBuffer,
1038            maximizedTagBufferLength,
1039            err);
1040
1041    if(U_FAILURE(*err)) {
1042        goto error;
1043    }
1044
1045    /**
1046     * Start first with just the language.
1047     **/
1048    {
1049        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1050
1051        const int32_t tagBufferLength =
1052            createLikelySubtagsString(
1053                lang,
1054                langLength,
1055                NULL,
1056                0,
1057                NULL,
1058                0,
1059                NULL,
1060                0,
1061                tagBuffer,
1062                sizeof(tagBuffer),
1063                err);
1064
1065        if(U_FAILURE(*err)) {
1066            goto error;
1067        }
1068        else if (uprv_strnicmp(
1069                    maximizedTagBuffer,
1070                    tagBuffer,
1071                    tagBufferLength) == 0) {
1072
1073            return createTagString(
1074                        lang,
1075                        langLength,
1076                        NULL,
1077                        0,
1078                        NULL,
1079                        0,
1080                        trailing,
1081                        trailingLength,
1082                        minimizedLocaleID,
1083                        minimizedLocaleIDCapacity,
1084                        err);
1085        }
1086    }
1087
1088    /**
1089     * Next, try the language and region.
1090     **/
1091    if (regionLength > 0) {
1092
1093        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1094
1095        const int32_t tagBufferLength =
1096            createLikelySubtagsString(
1097                lang,
1098                langLength,
1099                NULL,
1100                0,
1101                region,
1102                regionLength,
1103                NULL,
1104                0,
1105                tagBuffer,
1106                sizeof(tagBuffer),
1107                err);
1108
1109        if(U_FAILURE(*err)) {
1110            goto error;
1111        }
1112        else if (uprv_strnicmp(
1113                    maximizedTagBuffer,
1114                    tagBuffer,
1115                    tagBufferLength) == 0) {
1116
1117            return createTagString(
1118                        lang,
1119                        langLength,
1120                        NULL,
1121                        0,
1122                        region,
1123                        regionLength,
1124                        trailing,
1125                        trailingLength,
1126                        minimizedLocaleID,
1127                        minimizedLocaleIDCapacity,
1128                        err);
1129        }
1130    }
1131
1132    /**
1133     * Finally, try the language and script.  This is our last chance,
1134     * since trying with all three subtags would only yield the
1135     * maximal version that we already have.
1136     **/
1137    if (scriptLength > 0 && regionLength > 0) {
1138        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1139
1140        const int32_t tagBufferLength =
1141            createLikelySubtagsString(
1142                lang,
1143                langLength,
1144                script,
1145                scriptLength,
1146                NULL,
1147                0,
1148                NULL,
1149                0,
1150                tagBuffer,
1151                sizeof(tagBuffer),
1152                err);
1153
1154        if(U_FAILURE(*err)) {
1155            goto error;
1156        }
1157        else if (uprv_strnicmp(
1158                    maximizedTagBuffer,
1159                    tagBuffer,
1160                    tagBufferLength) == 0) {
1161
1162            return createTagString(
1163                        lang,
1164                        langLength,
1165                        script,
1166                        scriptLength,
1167                        NULL,
1168                        0,
1169                        trailing,
1170                        trailingLength,
1171                        minimizedLocaleID,
1172                        minimizedLocaleIDCapacity,
1173                        err);
1174        }
1175    }
1176
1177    {
1178        /**
1179         * If we got here, return the locale ID parameter.
1180         **/
1181        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1182
1183        uprv_memcpy(
1184            minimizedLocaleID,
1185            localeID,
1186            localeIDLength <= minimizedLocaleIDCapacity ?
1187                localeIDLength : minimizedLocaleIDCapacity);
1188
1189        return u_terminateChars(
1190                    minimizedLocaleID,
1191                    minimizedLocaleIDCapacity,
1192                    localeIDLength,
1193                    err);
1194    }
1195
1196error:
1197
1198    if (!U_FAILURE(*err)) {
1199        *err = U_ILLEGAL_ARGUMENT_ERROR;
1200    }
1201
1202    return -1;
1203
1204
1205}
1206
1207static UBool
1208do_canonicalize(const char*    localeID,
1209         char* buffer,
1210         int32_t bufferCapacity,
1211         UErrorCode* err)
1212{
1213    uloc_canonicalize(
1214        localeID,
1215        buffer,
1216        bufferCapacity,
1217        err);
1218
1219    if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1220        *err == U_BUFFER_OVERFLOW_ERROR) {
1221        *err = U_ILLEGAL_ARGUMENT_ERROR;
1222
1223        return FALSE;
1224    }
1225    else if (U_FAILURE(*err)) {
1226
1227        return FALSE;
1228    }
1229    else {
1230        return TRUE;
1231    }
1232}
1233
1234U_CAPI int32_t U_EXPORT2
1235uloc_addLikelySubtags(const char*    localeID,
1236         char* maximizedLocaleID,
1237         int32_t maximizedLocaleIDCapacity,
1238         UErrorCode* err)
1239{
1240    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1241
1242    if (!do_canonicalize(
1243        localeID,
1244        localeBuffer,
1245        sizeof(localeBuffer),
1246        err)) {
1247        return -1;
1248    }
1249    else {
1250        return _uloc_addLikelySubtags(
1251                    localeBuffer,
1252                    maximizedLocaleID,
1253                    maximizedLocaleIDCapacity,
1254                    err);
1255    }
1256}
1257
1258U_CAPI int32_t U_EXPORT2
1259uloc_minimizeSubtags(const char*    localeID,
1260         char* minimizedLocaleID,
1261         int32_t minimizedLocaleIDCapacity,
1262         UErrorCode* err)
1263{
1264    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1265
1266    if (!do_canonicalize(
1267        localeID,
1268        localeBuffer,
1269        sizeof(localeBuffer),
1270        err)) {
1271        return -1;
1272    }
1273    else {
1274        return _uloc_minimizeSubtags(
1275                    localeBuffer,
1276                    minimizedLocaleID,
1277                    minimizedLocaleIDCapacity,
1278                    err);
1279    }
1280}
1281
1282// Pairs of (language subtag, + or -) for finding out fast if common languages
1283// are LTR (minus) or RTL (plus).
1284static const char LANG_DIR_STRING[] =
1285        "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1286
1287// Implemented here because this calls uloc_addLikelySubtags().
1288U_CAPI UBool U_EXPORT2
1289uloc_isRightToLeft(const char *locale) {
1290    UErrorCode errorCode = U_ZERO_ERROR;
1291    char script[8];
1292    int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1293    if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1294            scriptLength == 0) {
1295        // Fastpath: We know the likely scripts and their writing direction
1296        // for some common languages.
1297        errorCode = U_ZERO_ERROR;
1298        char lang[8];
1299        int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1300        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1301                langLength == 0) {
1302            return FALSE;
1303        }
1304        const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1305        if (langPtr != NULL) {
1306            switch (langPtr[langLength]) {
1307            case '-': return FALSE;
1308            case '+': return TRUE;
1309            default: break;  // partial match of a longer code
1310            }
1311        }
1312        // Otherwise, find the likely script.
1313        errorCode = U_ZERO_ERROR;
1314        char likely[ULOC_FULLNAME_CAPACITY];
1315        (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
1316        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1317            return FALSE;
1318        }
1319        scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
1320        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1321                scriptLength == 0) {
1322            return FALSE;
1323        }
1324    }
1325    UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1326    return uscript_isRightToLeft(scriptCode);
1327}
1328
1329U_NAMESPACE_BEGIN
1330
1331UBool
1332Locale::isRightToLeft() const {
1333    return uloc_isRightToLeft(getBaseName());
1334}
1335
1336U_NAMESPACE_END
1337
1338// The following must at least allow for rg key value (6) plus terminator (1).
1339#define ULOC_RG_BUFLEN 8
1340
1341U_CAPI int32_t U_EXPORT2
1342ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1343                                     char *region, int32_t regionCapacity, UErrorCode* status) {
1344    if (U_FAILURE(*status)) {
1345        return 0;
1346    }
1347    char rgBuf[ULOC_RG_BUFLEN];
1348    UErrorCode rgStatus = U_ZERO_ERROR;
1349
1350    // First check for rg keyword value
1351    int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1352    if (U_FAILURE(rgStatus) || rgLen != 6) {
1353        rgLen = 0;
1354    } else {
1355        // rgBuf guaranteed to be zero terminated here, with text len 6
1356        char *rgPtr = rgBuf;
1357        for (; *rgPtr!= 0; rgPtr++) {
1358            *rgPtr = uprv_toupper(*rgPtr);
1359        }
1360        rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1361    }
1362
1363    if (rgLen == 0) {
1364        // No valid rg keyword value, try for unicode_region_subtag
1365        rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1366        if (U_FAILURE(*status)) {
1367            rgLen = 0;
1368        } else if (rgLen == 0 && inferRegion) {
1369            // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1370            char locBuf[ULOC_FULLNAME_CAPACITY];
1371            rgStatus = U_ZERO_ERROR;
1372            (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
1373            if (U_SUCCESS(rgStatus)) {
1374                rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
1375                if (U_FAILURE(*status)) {
1376                    rgLen = 0;
1377                }
1378            }
1379        }
1380    }
1381
1382    rgBuf[rgLen] = 0;
1383    uprv_strncpy(region, rgBuf, regionCapacity);
1384    return u_terminateChars(region, regionCapacity, rgLen, status);
1385}
1386
1387