1/*
2**********************************************************************
3*   Copyright (C) 2009-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*/
7
8#include "unicode/utypes.h"
9#include "unicode/ures.h"
10#include "unicode/putil.h"
11#include "unicode/uloc.h"
12#include "ustr_imp.h"
13#include "cmemory.h"
14#include "cstring.h"
15#include "putilimp.h"
16#include "uinvchar.h"
17#include "ulocimp.h"
18#include "uassert.h"
19
20/* struct holding a single variant */
21typedef struct VariantListEntry {
22    const char              *variant;
23    struct VariantListEntry *next;
24} VariantListEntry;
25
26/* struct holding a single attribute value */
27typedef struct AttributeListEntry {
28    const char              *attribute;
29    struct AttributeListEntry *next;
30} AttributeListEntry;
31
32/* struct holding a single extension */
33typedef struct ExtensionListEntry {
34    const char                  *key;
35    const char                  *value;
36    struct ExtensionListEntry   *next;
37} ExtensionListEntry;
38
39#define MAXEXTLANG 3
40typedef struct ULanguageTag {
41    char                *buf;   /* holding parsed subtags */
42    const char          *language;
43    const char          *extlang[MAXEXTLANG];
44    const char          *script;
45    const char          *region;
46    VariantListEntry    *variants;
47    ExtensionListEntry  *extensions;
48    const char          *privateuse;
49    const char          *grandfathered;
50} ULanguageTag;
51
52#define MINLEN 2
53#define SEP '-'
54#define PRIVATEUSE 'x'
55#define LDMLEXT 'u'
56
57#define LOCALE_SEP '_'
58#define LOCALE_EXT_SEP '@'
59#define LOCALE_KEYWORD_SEP ';'
60#define LOCALE_KEY_TYPE_SEP '='
61
62#define ISALPHA(c) uprv_isASCIILetter(c)
63#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
64
65static const char EMPTY[] = "";
66static const char LANG_UND[] = "und";
67static const char PRIVATEUSE_KEY[] = "x";
68static const char _POSIX[] = "_POSIX";
69static const char POSIX_KEY[] = "va";
70static const char POSIX_VALUE[] = "posix";
71static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
72static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
73static const char LOCALE_TYPE_YES[] = "yes";
74
75#define LANG_UND_LEN 3
76
77static const char* const GRANDFATHERED[] = {
78/*  grandfathered   preferred */
79    "art-lojban",   "jbo",
80    "cel-gaulish",  "xtg-x-cel-gaulish",
81    "en-GB-oed",    "en-GB-x-oed",
82    "i-ami",        "ami",
83    "i-bnn",        "bnn",
84    "i-default",    "en-x-i-default",
85    "i-enochian",   "und-x-i-enochian",
86    "i-hak",        "hak",
87    "i-klingon",    "tlh",
88    "i-lux",        "lb",
89    "i-mingo",      "see-x-i-mingo",
90    "i-navajo",     "nv",
91    "i-pwn",        "pwn",
92    "i-tao",        "tao",
93    "i-tay",        "tay",
94    "i-tsu",        "tsu",
95    "no-bok",       "nb",
96    "no-nyn",       "nn",
97    "sgn-be-fr",    "sfb",
98    "sgn-be-nl",    "vgt",
99    "sgn-ch-de",    "sgg",
100    "zh-guoyu",     "cmn",
101    "zh-hakka",     "hak",
102    "zh-min",       "nan-x-zh-min",
103    "zh-min-nan",   "nan",
104    "zh-xiang",     "hsn",
105    NULL,           NULL
106};
107
108static const char DEPRECATEDLANGS[][4] = {
109/*  deprecated  new */
110    "iw",       "he",
111    "ji",       "yi",
112    "in",       "id"
113};
114
115/*
116* -------------------------------------------------
117*
118* These ultag_ functions may be exposed as APIs later
119*
120* -------------------------------------------------
121*/
122
123static ULanguageTag*
124ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
125
126static void
127ultag_close(ULanguageTag* langtag);
128
129static const char*
130ultag_getLanguage(const ULanguageTag* langtag);
131
132#if 0
133static const char*
134ultag_getJDKLanguage(const ULanguageTag* langtag);
135#endif
136
137static const char*
138ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
139
140static int32_t
141ultag_getExtlangSize(const ULanguageTag* langtag);
142
143static const char*
144ultag_getScript(const ULanguageTag* langtag);
145
146static const char*
147ultag_getRegion(const ULanguageTag* langtag);
148
149static const char*
150ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
151
152static int32_t
153ultag_getVariantsSize(const ULanguageTag* langtag);
154
155static const char*
156ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
157
158static const char*
159ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
160
161static int32_t
162ultag_getExtensionsSize(const ULanguageTag* langtag);
163
164static const char*
165ultag_getPrivateUse(const ULanguageTag* langtag);
166
167#if 0
168static const char*
169ultag_getGrandfathered(const ULanguageTag* langtag);
170#endif
171
172/*
173* -------------------------------------------------
174*
175* Language subtag syntax validation functions
176*
177* -------------------------------------------------
178*/
179
180static UBool
181_isAlphaString(const char* s, int32_t len) {
182    int32_t i;
183    for (i = 0; i < len; i++) {
184        if (!ISALPHA(*(s + i))) {
185            return FALSE;
186        }
187    }
188    return TRUE;
189}
190
191static UBool
192_isNumericString(const char* s, int32_t len) {
193    int32_t i;
194    for (i = 0; i < len; i++) {
195        if (!ISNUMERIC(*(s + i))) {
196            return FALSE;
197        }
198    }
199    return TRUE;
200}
201
202static UBool
203_isAlphaNumericString(const char* s, int32_t len) {
204    int32_t i;
205    for (i = 0; i < len; i++) {
206        if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
207            return FALSE;
208        }
209    }
210    return TRUE;
211}
212
213static UBool
214_isLanguageSubtag(const char* s, int32_t len) {
215    /*
216     * language      = 2*3ALPHA            ; shortest ISO 639 code
217     *                 ["-" extlang]       ; sometimes followed by
218     *                                     ;   extended language subtags
219     *               / 4ALPHA              ; or reserved for future use
220     *               / 5*8ALPHA            ; or registered language subtag
221     */
222    if (len < 0) {
223        len = (int32_t)uprv_strlen(s);
224    }
225    if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
226        return TRUE;
227    }
228    return FALSE;
229}
230
231static UBool
232_isExtlangSubtag(const char* s, int32_t len) {
233    /*
234     * extlang       = 3ALPHA              ; selected ISO 639 codes
235     *                 *2("-" 3ALPHA)      ; permanently reserved
236     */
237    if (len < 0) {
238        len = (int32_t)uprv_strlen(s);
239    }
240    if (len == 3 && _isAlphaString(s, len)) {
241        return TRUE;
242    }
243    return FALSE;
244}
245
246static UBool
247_isScriptSubtag(const char* s, int32_t len) {
248    /*
249     * script        = 4ALPHA              ; ISO 15924 code
250     */
251    if (len < 0) {
252        len = (int32_t)uprv_strlen(s);
253    }
254    if (len == 4 && _isAlphaString(s, len)) {
255        return TRUE;
256    }
257    return FALSE;
258}
259
260static UBool
261_isRegionSubtag(const char* s, int32_t len) {
262    /*
263     * region        = 2ALPHA              ; ISO 3166-1 code
264     *               / 3DIGIT              ; UN M.49 code
265     */
266    if (len < 0) {
267        len = (int32_t)uprv_strlen(s);
268    }
269    if (len == 2 && _isAlphaString(s, len)) {
270        return TRUE;
271    }
272    if (len == 3 && _isNumericString(s, len)) {
273        return TRUE;
274    }
275    return FALSE;
276}
277
278static UBool
279_isVariantSubtag(const char* s, int32_t len) {
280    /*
281     * variant       = 5*8alphanum         ; registered variants
282     *               / (DIGIT 3alphanum)
283     */
284    if (len < 0) {
285        len = (int32_t)uprv_strlen(s);
286    }
287    if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
288        return TRUE;
289    }
290    if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
291        return TRUE;
292    }
293    return FALSE;
294}
295
296static UBool
297_isPrivateuseVariantSubtag(const char* s, int32_t len) {
298    /*
299     * variant       = 1*8alphanum         ; registered variants
300     *               / (DIGIT 3alphanum)
301     */
302    if (len < 0) {
303        len = (int32_t)uprv_strlen(s);
304    }
305    if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
306        return TRUE;
307    }
308    return FALSE;
309}
310
311static UBool
312_isExtensionSingleton(const char* s, int32_t len) {
313    /*
314     * extension     = singleton 1*("-" (2*8alphanum))
315     */
316    if (len < 0) {
317        len = (int32_t)uprv_strlen(s);
318    }
319    if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
320        return TRUE;
321    }
322    return FALSE;
323}
324
325static UBool
326_isExtensionSubtag(const char* s, int32_t len) {
327    /*
328     * extension     = singleton 1*("-" (2*8alphanum))
329     */
330    if (len < 0) {
331        len = (int32_t)uprv_strlen(s);
332    }
333    if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
334        return TRUE;
335    }
336    return FALSE;
337}
338
339static UBool
340_isExtensionSubtags(const char* s, int32_t len) {
341    const char *p = s;
342    const char *pSubtag = NULL;
343
344    if (len < 0) {
345        len = (int32_t)uprv_strlen(s);
346    }
347
348    while ((p - s) < len) {
349        if (*p == SEP) {
350            if (pSubtag == NULL) {
351                return FALSE;
352            }
353            if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
354                return FALSE;
355            }
356            pSubtag = NULL;
357        } else if (pSubtag == NULL) {
358            pSubtag = p;
359        }
360        p++;
361    }
362    if (pSubtag == NULL) {
363        return FALSE;
364    }
365    return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
366}
367
368static UBool
369_isPrivateuseValueSubtag(const char* s, int32_t len) {
370    /*
371     * privateuse    = "x" 1*("-" (1*8alphanum))
372     */
373    if (len < 0) {
374        len = (int32_t)uprv_strlen(s);
375    }
376    if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
377        return TRUE;
378    }
379    return FALSE;
380}
381
382static UBool
383_isPrivateuseValueSubtags(const char* s, int32_t len) {
384    const char *p = s;
385    const char *pSubtag = NULL;
386
387    if (len < 0) {
388        len = (int32_t)uprv_strlen(s);
389    }
390
391    while ((p - s) < len) {
392        if (*p == SEP) {
393            if (pSubtag == NULL) {
394                return FALSE;
395            }
396            if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
397                return FALSE;
398            }
399            pSubtag = NULL;
400        } else if (pSubtag == NULL) {
401            pSubtag = p;
402        }
403        p++;
404    }
405    if (pSubtag == NULL) {
406        return FALSE;
407    }
408    return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
409}
410
411U_CFUNC UBool
412ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
413    if (len < 0) {
414        len = (int32_t)uprv_strlen(s);
415    }
416    if (len == 2 && _isAlphaNumericString(s, len)) {
417        return TRUE;
418    }
419    return FALSE;
420}
421
422U_CFUNC UBool
423ultag_isUnicodeLocaleType(const char*s, int32_t len) {
424    const char* p;
425    int32_t subtagLen = 0;
426
427    if (len < 0) {
428        len = (int32_t)uprv_strlen(s);
429    }
430
431    for (p = s; len > 0; p++, len--) {
432        if (*p == SEP) {
433            if (subtagLen < 3) {
434                return FALSE;
435            }
436            subtagLen = 0;
437        } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
438            subtagLen++;
439            if (subtagLen > 8) {
440                return FALSE;
441            }
442        } else {
443            return FALSE;
444        }
445    }
446
447    return (subtagLen >= 3);
448}
449/*
450* -------------------------------------------------
451*
452* Helper functions
453*
454* -------------------------------------------------
455*/
456
457static UBool
458_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
459    UBool bAdded = TRUE;
460
461    if (*first == NULL) {
462        var->next = NULL;
463        *first = var;
464    } else {
465        VariantListEntry *prev, *cur;
466        int32_t cmp;
467
468        /* variants order should be preserved */
469        prev = NULL;
470        cur = *first;
471        while (TRUE) {
472            if (cur == NULL) {
473                prev->next = var;
474                var->next = NULL;
475                break;
476            }
477
478            /* Checking for duplicate variant */
479            cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
480            if (cmp == 0) {
481                /* duplicated variant */
482                bAdded = FALSE;
483                break;
484            }
485            prev = cur;
486            cur = cur->next;
487        }
488    }
489
490    return bAdded;
491}
492
493static UBool
494_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
495    UBool bAdded = TRUE;
496
497    if (*first == NULL) {
498        attr->next = NULL;
499        *first = attr;
500    } else {
501        AttributeListEntry *prev, *cur;
502        int32_t cmp;
503
504        /* reorder variants in alphabetical order */
505        prev = NULL;
506        cur = *first;
507        while (TRUE) {
508            if (cur == NULL) {
509                prev->next = attr;
510                attr->next = NULL;
511                break;
512            }
513            cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
514            if (cmp < 0) {
515                if (prev == NULL) {
516                    *first = attr;
517                } else {
518                    prev->next = attr;
519                }
520                attr->next = cur;
521                break;
522            }
523            if (cmp == 0) {
524                /* duplicated variant */
525                bAdded = FALSE;
526                break;
527            }
528            prev = cur;
529            cur = cur->next;
530        }
531    }
532
533    return bAdded;
534}
535
536
537static UBool
538_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
539    UBool bAdded = TRUE;
540
541    if (*first == NULL) {
542        ext->next = NULL;
543        *first = ext;
544    } else {
545        ExtensionListEntry *prev, *cur;
546        int32_t cmp;
547
548        /* reorder variants in alphabetical order */
549        prev = NULL;
550        cur = *first;
551        while (TRUE) {
552            if (cur == NULL) {
553                prev->next = ext;
554                ext->next = NULL;
555                break;
556            }
557            if (localeToBCP) {
558                /* special handling for locale to bcp conversion */
559                int32_t len, curlen;
560
561                len = (int32_t)uprv_strlen(ext->key);
562                curlen = (int32_t)uprv_strlen(cur->key);
563
564                if (len == 1 && curlen == 1) {
565                    if (*(ext->key) == *(cur->key)) {
566                        cmp = 0;
567                    } else if (*(ext->key) == PRIVATEUSE) {
568                        cmp = 1;
569                    } else if (*(cur->key) == PRIVATEUSE) {
570                        cmp = -1;
571                    } else {
572                        cmp = *(ext->key) - *(cur->key);
573                    }
574                } else if (len == 1) {
575                    cmp = *(ext->key) - LDMLEXT;
576                } else if (curlen == 1) {
577                    cmp = LDMLEXT - *(cur->key);
578                } else {
579                    cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
580                }
581            } else {
582                cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
583            }
584            if (cmp < 0) {
585                if (prev == NULL) {
586                    *first = ext;
587                } else {
588                    prev->next = ext;
589                }
590                ext->next = cur;
591                break;
592            }
593            if (cmp == 0) {
594                /* duplicated extension key */
595                bAdded = FALSE;
596                break;
597            }
598            prev = cur;
599            cur = cur->next;
600        }
601    }
602
603    return bAdded;
604}
605
606static void
607_initializeULanguageTag(ULanguageTag* langtag) {
608    int32_t i;
609
610    langtag->buf = NULL;
611
612    langtag->language = EMPTY;
613    for (i = 0; i < MAXEXTLANG; i++) {
614        langtag->extlang[i] = NULL;
615    }
616
617    langtag->script = EMPTY;
618    langtag->region = EMPTY;
619
620    langtag->variants = NULL;
621    langtag->extensions = NULL;
622
623    langtag->grandfathered = EMPTY;
624    langtag->privateuse = EMPTY;
625}
626
627static int32_t
628_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
629    char buf[ULOC_LANG_CAPACITY];
630    UErrorCode tmpStatus = U_ZERO_ERROR;
631    int32_t len, i;
632    int32_t reslen = 0;
633
634    if (U_FAILURE(*status)) {
635        return 0;
636    }
637
638    len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
639    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
640        if (strict) {
641            *status = U_ILLEGAL_ARGUMENT_ERROR;
642            return 0;
643        }
644        len = 0;
645    }
646
647    /* Note: returned language code is in lower case letters */
648
649    if (len == 0) {
650        if (reslen < capacity) {
651            uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
652        }
653        reslen += LANG_UND_LEN;
654    } else if (!_isLanguageSubtag(buf, len)) {
655            /* invalid language code */
656        if (strict) {
657            *status = U_ILLEGAL_ARGUMENT_ERROR;
658            return 0;
659        }
660        if (reslen < capacity) {
661            uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
662        }
663        reslen += LANG_UND_LEN;
664    } else {
665        /* resolve deprecated */
666        for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
667            if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
668                uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
669                len = (int32_t)uprv_strlen(buf);
670                break;
671            }
672        }
673        if (reslen < capacity) {
674            uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
675        }
676        reslen += len;
677    }
678    u_terminateChars(appendAt, capacity, reslen, status);
679    return reslen;
680}
681
682static int32_t
683_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
684    char buf[ULOC_SCRIPT_CAPACITY];
685    UErrorCode tmpStatus = U_ZERO_ERROR;
686    int32_t len;
687    int32_t reslen = 0;
688
689    if (U_FAILURE(*status)) {
690        return 0;
691    }
692
693    len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
694    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
695        if (strict) {
696            *status = U_ILLEGAL_ARGUMENT_ERROR;
697        }
698        return 0;
699    }
700
701    if (len > 0) {
702        if (!_isScriptSubtag(buf, len)) {
703            /* invalid script code */
704            if (strict) {
705                *status = U_ILLEGAL_ARGUMENT_ERROR;
706            }
707            return 0;
708        } else {
709            if (reslen < capacity) {
710                *(appendAt + reslen) = SEP;
711            }
712            reslen++;
713
714            if (reslen < capacity) {
715                uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
716            }
717            reslen += len;
718        }
719    }
720    u_terminateChars(appendAt, capacity, reslen, status);
721    return reslen;
722}
723
724static int32_t
725_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
726    char buf[ULOC_COUNTRY_CAPACITY];
727    UErrorCode tmpStatus = U_ZERO_ERROR;
728    int32_t len;
729    int32_t reslen = 0;
730
731    if (U_FAILURE(*status)) {
732        return 0;
733    }
734
735    len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
736    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
737        if (strict) {
738            *status = U_ILLEGAL_ARGUMENT_ERROR;
739        }
740        return 0;
741    }
742
743    if (len > 0) {
744        if (!_isRegionSubtag(buf, len)) {
745            /* invalid region code */
746            if (strict) {
747                *status = U_ILLEGAL_ARGUMENT_ERROR;
748            }
749            return 0;
750        } else {
751            if (reslen < capacity) {
752                *(appendAt + reslen) = SEP;
753            }
754            reslen++;
755
756            if (reslen < capacity) {
757                uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
758            }
759            reslen += len;
760        }
761    }
762    u_terminateChars(appendAt, capacity, reslen, status);
763    return reslen;
764}
765
766static int32_t
767_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
768    char buf[ULOC_FULLNAME_CAPACITY];
769    UErrorCode tmpStatus = U_ZERO_ERROR;
770    int32_t len, i;
771    int32_t reslen = 0;
772
773    if (U_FAILURE(*status)) {
774        return 0;
775    }
776
777    len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
778    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
779        if (strict) {
780            *status = U_ILLEGAL_ARGUMENT_ERROR;
781        }
782        return 0;
783    }
784
785    if (len > 0) {
786        char *p, *pVar;
787        UBool bNext = TRUE;
788        VariantListEntry *var;
789        VariantListEntry *varFirst = NULL;
790
791        pVar = NULL;
792        p = buf;
793        while (bNext) {
794            if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
795                if (*p == 0) {
796                    bNext = FALSE;
797                } else {
798                    *p = 0; /* terminate */
799                }
800                if (pVar == NULL) {
801                    if (strict) {
802                        *status = U_ILLEGAL_ARGUMENT_ERROR;
803                        break;
804                    }
805                    /* ignore empty variant */
806                } else {
807                    /* ICU uses upper case letters for variants, but
808                       the canonical format is lowercase in BCP47 */
809                    for (i = 0; *(pVar + i) != 0; i++) {
810                        *(pVar + i) = uprv_tolower(*(pVar + i));
811                    }
812
813                    /* validate */
814                    if (_isVariantSubtag(pVar, -1)) {
815                        if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
816                            /* emit the variant to the list */
817                            var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
818                            if (var == NULL) {
819                                *status = U_MEMORY_ALLOCATION_ERROR;
820                                break;
821                            }
822                            var->variant = pVar;
823                            if (!_addVariantToList(&varFirst, var)) {
824                                /* duplicated variant */
825                                uprv_free(var);
826                                if (strict) {
827                                    *status = U_ILLEGAL_ARGUMENT_ERROR;
828                                    break;
829                                }
830                            }
831                        } else {
832                            /* Special handling for POSIX variant, need to remember that we had it and then */
833                            /* treat it like an extension later. */
834                            *hadPosix = TRUE;
835                        }
836                    } else if (strict) {
837                        *status = U_ILLEGAL_ARGUMENT_ERROR;
838                        break;
839                    } else if (_isPrivateuseValueSubtag(pVar, -1)) {
840                        /* Handle private use subtags separately */
841                        break;
842                    }
843                }
844                /* reset variant starting position */
845                pVar = NULL;
846            } else if (pVar == NULL) {
847                pVar = p;
848            }
849            p++;
850        }
851
852        if (U_SUCCESS(*status)) {
853            if (varFirst != NULL) {
854                int32_t varLen;
855
856                /* write out validated/normalized variants to the target */
857                var = varFirst;
858                while (var != NULL) {
859                    if (reslen < capacity) {
860                        *(appendAt + reslen) = SEP;
861                    }
862                    reslen++;
863                    varLen = (int32_t)uprv_strlen(var->variant);
864                    if (reslen < capacity) {
865                        uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
866                    }
867                    reslen += varLen;
868                    var = var->next;
869                }
870            }
871        }
872
873        /* clean up */
874        var = varFirst;
875        while (var != NULL) {
876            VariantListEntry *tmpVar = var->next;
877            uprv_free(var);
878            var = tmpVar;
879        }
880
881        if (U_FAILURE(*status)) {
882            return 0;
883        }
884    }
885
886    u_terminateChars(appendAt, capacity, reslen, status);
887    return reslen;
888}
889
890static int32_t
891_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
892    char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
893    char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
894    int32_t attrBufLength = 0;
895    UBool isAttribute = FALSE;
896    UEnumeration *keywordEnum = NULL;
897    int32_t reslen = 0;
898
899    keywordEnum = uloc_openKeywords(localeID, status);
900    if (U_FAILURE(*status) && !hadPosix) {
901        uenum_close(keywordEnum);
902        return 0;
903    }
904    if (keywordEnum != NULL || hadPosix) {
905        /* reorder extensions */
906        int32_t len;
907        const char *key;
908        ExtensionListEntry *firstExt = NULL;
909        ExtensionListEntry *ext;
910        AttributeListEntry *firstAttr = NULL;
911        AttributeListEntry *attr;
912        char *attrValue;
913        char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
914        char *pExtBuf = extBuf;
915        int32_t extBufCapacity = sizeof(extBuf);
916        const char *bcpKey, *bcpValue;
917        UErrorCode tmpStatus = U_ZERO_ERROR;
918        int32_t keylen;
919        UBool isBcpUExt;
920
921        while (TRUE) {
922            isAttribute = FALSE;
923            key = uenum_next(keywordEnum, NULL, status);
924            if (key == NULL) {
925                break;
926            }
927            len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
928            /* buf must be null-terminated */
929            if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
930                if (strict) {
931                    *status = U_ILLEGAL_ARGUMENT_ERROR;
932                    break;
933                }
934                /* ignore this keyword */
935                tmpStatus = U_ZERO_ERROR;
936                continue;
937            }
938
939            keylen = (int32_t)uprv_strlen(key);
940            isBcpUExt = (keylen > 1);
941
942            /* special keyword used for representing Unicode locale attributes */
943            if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
944                isAttribute = TRUE;
945                if (len > 0) {
946                    int32_t i = 0;
947                    while (TRUE) {
948                        attrBufLength = 0;
949                        for (; i < len; i++) {
950                            if (buf[i] != '-') {
951                                attrBuf[attrBufLength++] = buf[i];
952                            } else {
953                                i++;
954                                break;
955                            }
956                        }
957                        if (attrBufLength > 0) {
958                            attrBuf[attrBufLength] = 0;
959
960                        } else if (i >= len){
961                            break;
962                        }
963
964                        /* create AttributeListEntry */
965                        attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
966                        if (attr == NULL) {
967                            *status = U_MEMORY_ALLOCATION_ERROR;
968                            break;
969                        }
970                        attrValue = (char*)uprv_malloc(attrBufLength + 1);
971                        if (attrValue == NULL) {
972                            *status = U_MEMORY_ALLOCATION_ERROR;
973                            break;
974                        }
975                        uprv_strcpy(attrValue, attrBuf);
976                        attr->attribute = attrValue;
977
978                        if (!_addAttributeToList(&firstAttr, attr)) {
979                            uprv_free(attr);
980                            uprv_free(attrValue);
981                            if (strict) {
982                                *status = U_ILLEGAL_ARGUMENT_ERROR;
983                                break;
984                            }
985                        }
986                    }
987                }
988            } else if (isBcpUExt) {
989                bcpKey = uloc_toUnicodeLocaleKey(key);
990                if (bcpKey == NULL) {
991                    if (strict) {
992                        *status = U_ILLEGAL_ARGUMENT_ERROR;
993                        break;
994                    }
995                    continue;
996                }
997
998                /* we've checked buf is null-terminated above */
999                bcpValue = uloc_toUnicodeLocaleType(key, buf);
1000                if (bcpValue == NULL) {
1001                    if (strict) {
1002                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1003                        break;
1004                    }
1005                    continue;
1006                }
1007                if (bcpValue == buf) {
1008                    /*
1009                    When uloc_toUnicodeLocaleType(key, buf) returns the
1010                    input value as is, the value is well-formed, but has
1011                    no known mapping. This implementation normalizes the
1012                    the value to lower case
1013                    */
1014                    int32_t bcpValueLen = uprv_strlen(bcpValue);
1015                    if (bcpValueLen < extBufCapacity) {
1016                        uprv_strcpy(pExtBuf, bcpValue);
1017                        T_CString_toLowerCase(pExtBuf);
1018
1019                        bcpValue = pExtBuf;
1020
1021                        pExtBuf += (bcpValueLen + 1);
1022                        extBufCapacity -= (bcpValueLen + 1);
1023                    } else {
1024                        if (strict) {
1025                            *status = U_ILLEGAL_ARGUMENT_ERROR;
1026                            break;
1027                        }
1028                        continue;
1029                    }
1030                }
1031            } else {
1032                if (*key == PRIVATEUSE) {
1033                    if (!_isPrivateuseValueSubtags(buf, len)) {
1034                        if (strict) {
1035                            *status = U_ILLEGAL_ARGUMENT_ERROR;
1036                            break;
1037                        }
1038                        continue;
1039                    }
1040                } else {
1041                    if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1042                        if (strict) {
1043                            *status = U_ILLEGAL_ARGUMENT_ERROR;
1044                            break;
1045                        }
1046                        continue;
1047                    }
1048                }
1049                bcpKey = key;
1050                if ((len + 1) < extBufCapacity) {
1051                    uprv_memcpy(pExtBuf, buf, len);
1052                    bcpValue = pExtBuf;
1053
1054                    pExtBuf += len;
1055
1056                    *pExtBuf = 0;
1057                    pExtBuf++;
1058
1059                    extBufCapacity -= (len + 1);
1060                } else {
1061                    *status = U_ILLEGAL_ARGUMENT_ERROR;
1062                    break;
1063                }
1064            }
1065
1066            if (!isAttribute) {
1067                /* create ExtensionListEntry */
1068                ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1069                if (ext == NULL) {
1070                    *status = U_MEMORY_ALLOCATION_ERROR;
1071                    break;
1072                }
1073                ext->key = bcpKey;
1074                ext->value = bcpValue;
1075
1076                if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1077                    uprv_free(ext);
1078                    if (strict) {
1079                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1080                        break;
1081                    }
1082                }
1083            }
1084        }
1085
1086        /* Special handling for POSIX variant - add the keywords for POSIX */
1087        if (hadPosix) {
1088            /* create ExtensionListEntry for POSIX */
1089            ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1090            if (ext == NULL) {
1091                *status = U_MEMORY_ALLOCATION_ERROR;
1092                goto cleanup;
1093            }
1094            ext->key = POSIX_KEY;
1095            ext->value = POSIX_VALUE;
1096
1097            if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1098                uprv_free(ext);
1099            }
1100        }
1101
1102        if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1103            UBool startLDMLExtension = FALSE;
1104
1105            attr = firstAttr;
1106            ext = firstExt;
1107            do {
1108                if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1109                   /* write LDML singleton extension */
1110                   if (reslen < capacity) {
1111                       *(appendAt + reslen) = SEP;
1112                   }
1113                   reslen++;
1114                   if (reslen < capacity) {
1115                       *(appendAt + reslen) = LDMLEXT;
1116                   }
1117                   reslen++;
1118
1119                   startLDMLExtension = TRUE;
1120                }
1121
1122                /* write out the sorted BCP47 attributes, extensions and private use */
1123                if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1124                    if (reslen < capacity) {
1125                        *(appendAt + reslen) = SEP;
1126                    }
1127                    reslen++;
1128                    len = (int32_t)uprv_strlen(ext->key);
1129                    if (reslen < capacity) {
1130                        uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1131                    }
1132                    reslen += len;
1133                    if (reslen < capacity) {
1134                        *(appendAt + reslen) = SEP;
1135                    }
1136                    reslen++;
1137                    len = (int32_t)uprv_strlen(ext->value);
1138                    if (reslen < capacity) {
1139                        uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1140                    }
1141                    reslen += len;
1142
1143                    ext = ext->next;
1144                } else if (attr) {
1145                    /* write the value for the attributes */
1146                    if (reslen < capacity) {
1147                        *(appendAt + reslen) = SEP;
1148                    }
1149                    reslen++;
1150                    len = (int32_t)uprv_strlen(attr->attribute);
1151                    if (reslen < capacity) {
1152                        uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1153                    }
1154                    reslen += len;
1155
1156                    attr = attr->next;
1157                }
1158            } while (attr != NULL || ext != NULL);
1159        }
1160cleanup:
1161        /* clean up */
1162        ext = firstExt;
1163        while (ext != NULL) {
1164            ExtensionListEntry *tmpExt = ext->next;
1165            uprv_free(ext);
1166            ext = tmpExt;
1167        }
1168
1169        attr = firstAttr;
1170        while (attr != NULL) {
1171            AttributeListEntry *tmpAttr = attr->next;
1172            char *pValue = (char *)attr->attribute;
1173            uprv_free(pValue);
1174            uprv_free(attr);
1175            attr = tmpAttr;
1176        }
1177
1178        uenum_close(keywordEnum);
1179
1180        if (U_FAILURE(*status)) {
1181            return 0;
1182        }
1183    }
1184
1185    return u_terminateChars(appendAt, capacity, reslen, status);
1186}
1187
1188/**
1189 * Append keywords parsed from LDML extension value
1190 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1191 * Note: char* buf is used for storing keywords
1192 */
1193static void
1194_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1195    const char *pTag;   /* beginning of current subtag */
1196    const char *pKwds;  /* beginning of key-type pairs */
1197    UBool variantExists = *posixVariant;
1198
1199    ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
1200    ExtensionListEntry *kwd, *nextKwd;
1201
1202    AttributeListEntry *attrFirst = NULL;   /* first attribute */
1203    AttributeListEntry *attr, *nextAttr;
1204
1205    int32_t len;
1206    int32_t bufIdx = 0;
1207
1208    char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1209    int32_t attrBufIdx = 0;
1210
1211    /* Reset the posixVariant value */
1212    *posixVariant = FALSE;
1213
1214    pTag = ldmlext;
1215    pKwds = NULL;
1216
1217    /* Iterate through u extension attributes */
1218    while (*pTag) {
1219        /* locate next separator char */
1220        for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1221
1222        if (ultag_isUnicodeLocaleKey(pTag, len)) {
1223            pKwds = pTag;
1224            break;
1225        }
1226
1227        /* add this attribute to the list */
1228        attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1229        if (attr == NULL) {
1230            *status = U_MEMORY_ALLOCATION_ERROR;
1231            goto cleanup;
1232        }
1233
1234        if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1235            uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1236            attrBuf[attrBufIdx + len] = 0;
1237            attr->attribute = &attrBuf[attrBufIdx];
1238            attrBufIdx += (len + 1);
1239        } else {
1240            *status = U_ILLEGAL_ARGUMENT_ERROR;
1241            goto cleanup;
1242        }
1243
1244        if (!_addAttributeToList(&attrFirst, attr)) {
1245            *status = U_ILLEGAL_ARGUMENT_ERROR;
1246            uprv_free(attr);
1247            goto cleanup;
1248        }
1249
1250        /* next tag */
1251        pTag += len;
1252        if (*pTag) {
1253            /* next to the separator */
1254            pTag++;
1255        }
1256    }
1257
1258    if (attrFirst) {
1259        /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1260
1261        if (attrBufIdx > bufSize) {
1262            /* attrBufIdx == <total length of attribute subtag> + 1 */
1263            *status = U_ILLEGAL_ARGUMENT_ERROR;
1264            goto cleanup;
1265        }
1266
1267        kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1268        if (kwd == NULL) {
1269            *status = U_MEMORY_ALLOCATION_ERROR;
1270            goto cleanup;
1271        }
1272
1273        kwd->key = LOCALE_ATTRIBUTE_KEY;
1274        kwd->value = buf;
1275
1276        /* attribute subtags sorted in alphabetical order as type */
1277        attr = attrFirst;
1278        while (attr != NULL) {
1279            nextAttr = attr->next;
1280
1281            /* buffer size check is done above */
1282            if (attr != attrFirst) {
1283                *(buf + bufIdx) = SEP;
1284                bufIdx++;
1285            }
1286
1287            len = uprv_strlen(attr->attribute);
1288            uprv_memcpy(buf + bufIdx, attr->attribute, len);
1289            bufIdx += len;
1290
1291            attr = nextAttr;
1292        }
1293        *(buf + bufIdx) = 0;
1294        bufIdx++;
1295
1296        if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1297            *status = U_ILLEGAL_ARGUMENT_ERROR;
1298            uprv_free(kwd);
1299            goto cleanup;
1300        }
1301
1302        /* once keyword entry is created, delete the attribute list */
1303        attr = attrFirst;
1304        while (attr != NULL) {
1305            nextAttr = attr->next;
1306            uprv_free(attr);
1307            attr = nextAttr;
1308        }
1309        attrFirst = NULL;
1310    }
1311
1312    if (pKwds) {
1313        const char *pBcpKey = NULL;     /* u extenstion key subtag */
1314        const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
1315        int32_t bcpKeyLen = 0;
1316        int32_t bcpTypeLen = 0;
1317        UBool isDone = FALSE;
1318
1319        pTag = pKwds;
1320        /* BCP47 representation of LDML key/type pairs */
1321        while (!isDone) {
1322            const char *pNextBcpKey = NULL;
1323            int32_t nextBcpKeyLen = 0;
1324            UBool emitKeyword = FALSE;
1325
1326            if (*pTag) {
1327                /* locate next separator char */
1328                for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1329
1330                if (ultag_isUnicodeLocaleKey(pTag, len)) {
1331                    if (pBcpKey) {
1332                        emitKeyword = TRUE;
1333                        pNextBcpKey = pTag;
1334                        nextBcpKeyLen = len;
1335                    } else {
1336                        pBcpKey = pTag;
1337                        bcpKeyLen = len;
1338                    }
1339                } else {
1340                    U_ASSERT(pBcpKey != NULL);
1341                    /* within LDML type subtags */
1342                    if (pBcpType) {
1343                        bcpTypeLen += (len + 1);
1344                    } else {
1345                        pBcpType = pTag;
1346                        bcpTypeLen = len;
1347                    }
1348                }
1349
1350                /* next tag */
1351                pTag += len;
1352                if (*pTag) {
1353                    /* next to the separator */
1354                    pTag++;
1355                }
1356            } else {
1357                /* processing last one */
1358                emitKeyword = TRUE;
1359                isDone = TRUE;
1360            }
1361
1362            if (emitKeyword) {
1363                const char *pKey = NULL;    /* LDML key */
1364                const char *pType = NULL;   /* LDML type */
1365
1366                char bcpKeyBuf[9];          /* BCP key length is always 2 for now */
1367
1368                U_ASSERT(pBcpKey != NULL);
1369
1370                if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
1371                    /* the BCP key is invalid */
1372                    *status = U_ILLEGAL_ARGUMENT_ERROR;
1373                    goto cleanup;
1374                }
1375
1376                uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1377                bcpKeyBuf[bcpKeyLen] = 0;
1378
1379                /* u extension key to LDML key */
1380                pKey = uloc_toLegacyKey(bcpKeyBuf);
1381                if (pKey == NULL) {
1382                    *status = U_ILLEGAL_ARGUMENT_ERROR;
1383                    goto cleanup;
1384                }
1385                if (pKey == bcpKeyBuf) {
1386                    /*
1387                    The key returned by toLegacyKey points to the input buffer.
1388                    We normalize the result key to lower case.
1389                    */
1390                    T_CString_toLowerCase(bcpKeyBuf);
1391                    if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1392                        uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1393                        pKey = buf + bufIdx;
1394                        bufIdx += bcpKeyLen;
1395                        *(buf + bufIdx) = 0;
1396                        bufIdx++;
1397                    } else {
1398                        *status = U_BUFFER_OVERFLOW_ERROR;
1399                        goto cleanup;
1400                    }
1401                }
1402
1403                if (pBcpType) {
1404                    char bcpTypeBuf[128];       /* practically long enough even considering multiple subtag type */
1405                    if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
1406                        /* the BCP type is too long */
1407                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1408                        goto cleanup;
1409                    }
1410
1411                    uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1412                    bcpTypeBuf[bcpTypeLen] = 0;
1413
1414                    /* BCP type to locale type */
1415                    pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1416                    if (pType == NULL) {
1417                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1418                        goto cleanup;
1419                    }
1420                    if (pType == bcpTypeBuf) {
1421                        /*
1422                        The type returned by toLegacyType points to the input buffer.
1423                        We normalize the result type to lower case.
1424                        */
1425                        /* normalize to lower case */
1426                        T_CString_toLowerCase(bcpTypeBuf);
1427                        if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1428                            uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1429                            pType = buf + bufIdx;
1430                            bufIdx += bcpTypeLen;
1431                            *(buf + bufIdx) = 0;
1432                            bufIdx++;
1433                        } else {
1434                            *status = U_BUFFER_OVERFLOW_ERROR;
1435                            goto cleanup;
1436                        }
1437                    }
1438                } else {
1439                    /* typeless - default type value is "yes" */
1440                    pType = LOCALE_TYPE_YES;
1441                }
1442
1443                /* Special handling for u-va-posix, since we want to treat this as a variant,
1444                   not as a keyword */
1445                if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1446                    *posixVariant = TRUE;
1447                } else {
1448                    /* create an ExtensionListEntry for this keyword */
1449                    kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1450                    if (kwd == NULL) {
1451                        *status = U_MEMORY_ALLOCATION_ERROR;
1452                        goto cleanup;
1453                    }
1454
1455                    kwd->key = pKey;
1456                    kwd->value = pType;
1457
1458                    if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1459                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1460                        uprv_free(kwd);
1461                        goto cleanup;
1462                    }
1463                }
1464
1465                pBcpKey = pNextBcpKey;
1466                bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1467                pBcpType = NULL;
1468                bcpTypeLen = 0;
1469            }
1470        }
1471    }
1472
1473    kwd = kwdFirst;
1474    while (kwd != NULL) {
1475        nextKwd = kwd->next;
1476        _addExtensionToList(appendTo, kwd, FALSE);
1477        kwd = nextKwd;
1478    }
1479
1480    return;
1481
1482cleanup:
1483    attr = attrFirst;
1484    while (attr != NULL) {
1485        nextAttr = attr->next;
1486        uprv_free(attr);
1487        attr = nextAttr;
1488    }
1489
1490    kwd = kwdFirst;
1491    while (kwd != NULL) {
1492        nextKwd = kwd->next;
1493        uprv_free(kwd);
1494        kwd = nextKwd;
1495    }
1496}
1497
1498
1499static int32_t
1500_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1501    int32_t reslen = 0;
1502    int32_t i, n;
1503    int32_t len;
1504    ExtensionListEntry *kwdFirst = NULL;
1505    ExtensionListEntry *kwd;
1506    const char *key, *type;
1507    char *kwdBuf = NULL;
1508    int32_t kwdBufLength = capacity;
1509    UBool posixVariant = FALSE;
1510
1511    if (U_FAILURE(*status)) {
1512        return 0;
1513    }
1514
1515    kwdBuf = (char*)uprv_malloc(kwdBufLength);
1516    if (kwdBuf == NULL) {
1517        *status = U_MEMORY_ALLOCATION_ERROR;
1518        return 0;
1519    }
1520
1521    /* Determine if variants already exists */
1522    if (ultag_getVariantsSize(langtag)) {
1523        posixVariant = TRUE;
1524    }
1525
1526    n = ultag_getExtensionsSize(langtag);
1527
1528    /* resolve locale keywords and reordering keys */
1529    for (i = 0; i < n; i++) {
1530        key = ultag_getExtensionKey(langtag, i);
1531        type = ultag_getExtensionValue(langtag, i);
1532        if (*key == LDMLEXT) {
1533            _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1534            if (U_FAILURE(*status)) {
1535                break;
1536            }
1537        } else {
1538            kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1539            if (kwd == NULL) {
1540                *status = U_MEMORY_ALLOCATION_ERROR;
1541                break;
1542            }
1543            kwd->key = key;
1544            kwd->value = type;
1545            if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1546                uprv_free(kwd);
1547                *status = U_ILLEGAL_ARGUMENT_ERROR;
1548                break;
1549            }
1550        }
1551    }
1552
1553    if (U_SUCCESS(*status)) {
1554        type = ultag_getPrivateUse(langtag);
1555        if ((int32_t)uprv_strlen(type) > 0) {
1556            /* add private use as a keyword */
1557            kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1558            if (kwd == NULL) {
1559                *status = U_MEMORY_ALLOCATION_ERROR;
1560            } else {
1561                kwd->key = PRIVATEUSE_KEY;
1562                kwd->value = type;
1563                if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1564                    uprv_free(kwd);
1565                    *status = U_ILLEGAL_ARGUMENT_ERROR;
1566                }
1567            }
1568        }
1569    }
1570
1571    /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1572
1573    if (U_SUCCESS(*status) && posixVariant) {
1574        len = (int32_t) uprv_strlen(_POSIX);
1575        if (reslen < capacity) {
1576            uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1577        }
1578        reslen += len;
1579    }
1580
1581    if (U_SUCCESS(*status) && kwdFirst != NULL) {
1582        /* write out the sorted keywords */
1583        UBool firstValue = TRUE;
1584        kwd = kwdFirst;
1585        do {
1586            if (reslen < capacity) {
1587                if (firstValue) {
1588                    /* '@' */
1589                    *(appendAt + reslen) = LOCALE_EXT_SEP;
1590                    firstValue = FALSE;
1591                } else {
1592                    /* ';' */
1593                    *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1594                }
1595            }
1596            reslen++;
1597
1598            /* key */
1599            len = (int32_t)uprv_strlen(kwd->key);
1600            if (reslen < capacity) {
1601                uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1602            }
1603            reslen += len;
1604
1605            /* '=' */
1606            if (reslen < capacity) {
1607                *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1608            }
1609            reslen++;
1610
1611            /* type */
1612            len = (int32_t)uprv_strlen(kwd->value);
1613            if (reslen < capacity) {
1614                uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1615            }
1616            reslen += len;
1617
1618            kwd = kwd->next;
1619        } while (kwd);
1620    }
1621
1622    /* clean up */
1623    kwd = kwdFirst;
1624    while (kwd != NULL) {
1625        ExtensionListEntry *tmpKwd = kwd->next;
1626        uprv_free(kwd);
1627        kwd = tmpKwd;
1628    }
1629
1630    uprv_free(kwdBuf);
1631
1632    if (U_FAILURE(*status)) {
1633        return 0;
1634    }
1635
1636    return u_terminateChars(appendAt, capacity, reslen, status);
1637}
1638
1639static int32_t
1640_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1641    char buf[ULOC_FULLNAME_CAPACITY];
1642    char tmpAppend[ULOC_FULLNAME_CAPACITY];
1643    UErrorCode tmpStatus = U_ZERO_ERROR;
1644    int32_t len, i;
1645    int32_t reslen = 0;
1646
1647    if (U_FAILURE(*status)) {
1648        return 0;
1649    }
1650
1651    len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1652    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1653        if (strict) {
1654            *status = U_ILLEGAL_ARGUMENT_ERROR;
1655        }
1656        return 0;
1657    }
1658
1659    if (len > 0) {
1660        char *p, *pPriv;
1661        UBool bNext = TRUE;
1662        UBool firstValue = TRUE;
1663        UBool writeValue;
1664
1665        pPriv = NULL;
1666        p = buf;
1667        while (bNext) {
1668            writeValue = FALSE;
1669            if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1670                if (*p == 0) {
1671                    bNext = FALSE;
1672                } else {
1673                    *p = 0; /* terminate */
1674                }
1675                if (pPriv != NULL) {
1676                    /* Private use in the canonical format is lowercase in BCP47 */
1677                    for (i = 0; *(pPriv + i) != 0; i++) {
1678                        *(pPriv + i) = uprv_tolower(*(pPriv + i));
1679                    }
1680
1681                    /* validate */
1682                    if (_isPrivateuseValueSubtag(pPriv, -1)) {
1683                        if (firstValue) {
1684                            if (!_isVariantSubtag(pPriv, -1)) {
1685                                writeValue = TRUE;
1686                            }
1687                        } else {
1688                            writeValue = TRUE;
1689                        }
1690                    } else if (strict) {
1691                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1692                        break;
1693                    } else {
1694                        break;
1695                    }
1696
1697                    if (writeValue) {
1698                        if (reslen < capacity) {
1699                            tmpAppend[reslen++] = SEP;
1700                        }
1701
1702                        if (firstValue) {
1703                            if (reslen < capacity) {
1704                                tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1705                            }
1706
1707                            if (reslen < capacity) {
1708                                tmpAppend[reslen++] = SEP;
1709                            }
1710
1711                            len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1712                            if (reslen < capacity) {
1713                                uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1714                            }
1715                            reslen += len;
1716
1717                            if (reslen < capacity) {
1718                                tmpAppend[reslen++] = SEP;
1719                            }
1720
1721                            firstValue = FALSE;
1722                        }
1723
1724                        len = (int32_t)uprv_strlen(pPriv);
1725                        if (reslen < capacity) {
1726                            uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1727                        }
1728                        reslen += len;
1729                    }
1730                }
1731                /* reset private use starting position */
1732                pPriv = NULL;
1733            } else if (pPriv == NULL) {
1734                pPriv = p;
1735            }
1736            p++;
1737        }
1738
1739        if (U_FAILURE(*status)) {
1740            return 0;
1741        }
1742    }
1743
1744    if (U_SUCCESS(*status)) {
1745        len = reslen;
1746        if (reslen < capacity) {
1747            uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1748        }
1749    }
1750
1751    u_terminateChars(appendAt, capacity, reslen, status);
1752
1753    return reslen;
1754}
1755
1756/*
1757* -------------------------------------------------
1758*
1759* ultag_ functions
1760*
1761* -------------------------------------------------
1762*/
1763
1764/* Bit flags used by the parser */
1765#define LANG 0x0001
1766#define EXTL 0x0002
1767#define SCRT 0x0004
1768#define REGN 0x0008
1769#define VART 0x0010
1770#define EXTS 0x0020
1771#define EXTV 0x0040
1772#define PRIV 0x0080
1773
1774static ULanguageTag*
1775ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1776    ULanguageTag *t;
1777    char *tagBuf;
1778    int16_t next;
1779    char *pSubtag, *pNext, *pLastGoodPosition;
1780    int32_t subtagLen;
1781    int32_t extlangIdx;
1782    ExtensionListEntry *pExtension;
1783    char *pExtValueSubtag, *pExtValueSubtagEnd;
1784    int32_t i;
1785    UBool privateuseVar = FALSE;
1786    int32_t grandfatheredLen = 0;
1787
1788    if (parsedLen != NULL) {
1789        *parsedLen = 0;
1790    }
1791
1792    if (U_FAILURE(*status)) {
1793        return NULL;
1794    }
1795
1796    if (tagLen < 0) {
1797        tagLen = (int32_t)uprv_strlen(tag);
1798    }
1799
1800    /* copy the entire string */
1801    tagBuf = (char*)uprv_malloc(tagLen + 1);
1802    if (tagBuf == NULL) {
1803        *status = U_MEMORY_ALLOCATION_ERROR;
1804        return NULL;
1805    }
1806    uprv_memcpy(tagBuf, tag, tagLen);
1807    *(tagBuf + tagLen) = 0;
1808
1809    /* create a ULanguageTag */
1810    t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1811    if (t == NULL) {
1812        uprv_free(tagBuf);
1813        *status = U_MEMORY_ALLOCATION_ERROR;
1814        return NULL;
1815    }
1816    _initializeULanguageTag(t);
1817    t->buf = tagBuf;
1818
1819    if (tagLen < MINLEN) {
1820        /* the input tag is too short - return empty ULanguageTag */
1821        return t;
1822    }
1823
1824    /* check if the tag is grandfathered */
1825    for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1826        if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1827            int32_t newTagLength;
1828
1829            grandfatheredLen = tagLen;  /* back up for output parsedLen */
1830            newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
1831            if (tagLen < newTagLength) {
1832                uprv_free(tagBuf);
1833                tagBuf = (char*)uprv_malloc(newTagLength + 1);
1834                if (tagBuf == NULL) {
1835                    *status = U_MEMORY_ALLOCATION_ERROR;
1836                    return NULL;
1837                }
1838                t->buf = tagBuf;
1839                tagLen = newTagLength;
1840            }
1841            uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1842            break;
1843        }
1844    }
1845
1846    /*
1847     * langtag      =   language
1848     *                  ["-" script]
1849     *                  ["-" region]
1850     *                  *("-" variant)
1851     *                  *("-" extension)
1852     *                  ["-" privateuse]
1853     */
1854
1855    next = LANG | PRIV;
1856    pNext = pLastGoodPosition = tagBuf;
1857    extlangIdx = 0;
1858    pExtension = NULL;
1859    pExtValueSubtag = NULL;
1860    pExtValueSubtagEnd = NULL;
1861
1862    while (pNext) {
1863        char *pSep;
1864
1865        pSubtag = pNext;
1866
1867        /* locate next separator char */
1868        pSep = pSubtag;
1869        while (*pSep) {
1870            if (*pSep == SEP) {
1871                break;
1872            }
1873            pSep++;
1874        }
1875        if (*pSep == 0) {
1876            /* last subtag */
1877            pNext = NULL;
1878        } else {
1879            pNext = pSep + 1;
1880        }
1881        subtagLen = (int32_t)(pSep - pSubtag);
1882
1883        if (next & LANG) {
1884            if (_isLanguageSubtag(pSubtag, subtagLen)) {
1885                *pSep = 0;  /* terminate */
1886                t->language = T_CString_toLowerCase(pSubtag);
1887
1888                pLastGoodPosition = pSep;
1889                next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1890                continue;
1891            }
1892        }
1893        if (next & EXTL) {
1894            if (_isExtlangSubtag(pSubtag, subtagLen)) {
1895                *pSep = 0;
1896                t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1897
1898                pLastGoodPosition = pSep;
1899                if (extlangIdx < 3) {
1900                    next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1901                } else {
1902                    next = SCRT | REGN | VART | EXTS | PRIV;
1903                }
1904                continue;
1905            }
1906        }
1907        if (next & SCRT) {
1908            if (_isScriptSubtag(pSubtag, subtagLen)) {
1909                char *p = pSubtag;
1910
1911                *pSep = 0;
1912
1913                /* to title case */
1914                *p = uprv_toupper(*p);
1915                p++;
1916                for (; *p; p++) {
1917                    *p = uprv_tolower(*p);
1918                }
1919
1920                t->script = pSubtag;
1921
1922                pLastGoodPosition = pSep;
1923                next = REGN | VART | EXTS | PRIV;
1924                continue;
1925            }
1926        }
1927        if (next & REGN) {
1928            if (_isRegionSubtag(pSubtag, subtagLen)) {
1929                *pSep = 0;
1930                t->region = T_CString_toUpperCase(pSubtag);
1931
1932                pLastGoodPosition = pSep;
1933                next = VART | EXTS | PRIV;
1934                continue;
1935            }
1936        }
1937        if (next & VART) {
1938            if (_isVariantSubtag(pSubtag, subtagLen) ||
1939               (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
1940                VariantListEntry *var;
1941                UBool isAdded;
1942
1943                var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1944                if (var == NULL) {
1945                    *status = U_MEMORY_ALLOCATION_ERROR;
1946                    goto error;
1947                }
1948                *pSep = 0;
1949                var->variant = T_CString_toUpperCase(pSubtag);
1950                isAdded = _addVariantToList(&(t->variants), var);
1951                if (!isAdded) {
1952                    /* duplicated variant entry */
1953                    uprv_free(var);
1954                    break;
1955                }
1956                pLastGoodPosition = pSep;
1957                next = VART | EXTS | PRIV;
1958                continue;
1959            }
1960        }
1961        if (next & EXTS) {
1962            if (_isExtensionSingleton(pSubtag, subtagLen)) {
1963                if (pExtension != NULL) {
1964                    if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1965                        /* the previous extension is incomplete */
1966                        uprv_free(pExtension);
1967                        pExtension = NULL;
1968                        break;
1969                    }
1970
1971                    /* terminate the previous extension value */
1972                    *pExtValueSubtagEnd = 0;
1973                    pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1974
1975                    /* insert the extension to the list */
1976                    if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1977                        pLastGoodPosition = pExtValueSubtagEnd;
1978                    } else {
1979                        /* stop parsing here */
1980                        uprv_free(pExtension);
1981                        pExtension = NULL;
1982                        break;
1983                    }
1984                }
1985
1986                /* create a new extension */
1987                pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1988                if (pExtension == NULL) {
1989                    *status = U_MEMORY_ALLOCATION_ERROR;
1990                    goto error;
1991                }
1992                *pSep = 0;
1993                pExtension->key = T_CString_toLowerCase(pSubtag);
1994                pExtension->value = NULL;   /* will be set later */
1995
1996                /*
1997                 * reset the start and the end location of extension value
1998                 * subtags for this extension
1999                 */
2000                pExtValueSubtag = NULL;
2001                pExtValueSubtagEnd = NULL;
2002
2003                next = EXTV;
2004                continue;
2005            }
2006        }
2007        if (next & EXTV) {
2008            if (_isExtensionSubtag(pSubtag, subtagLen)) {
2009                if (pExtValueSubtag == NULL) {
2010                    /* if the start postion of this extension's value is not yet,
2011                        this one is the first value subtag */
2012                    pExtValueSubtag = pSubtag;
2013                }
2014
2015                /* Mark the end of this subtag */
2016                pExtValueSubtagEnd = pSep;
2017                next = EXTS | EXTV | PRIV;
2018
2019                continue;
2020            }
2021        }
2022        if (next & PRIV) {
2023            if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2024                char *pPrivuseVal;
2025
2026                if (pExtension != NULL) {
2027                    /* Process the last extension */
2028                    if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2029                        /* the previous extension is incomplete */
2030                        uprv_free(pExtension);
2031                        pExtension = NULL;
2032                        break;
2033                    } else {
2034                        /* terminate the previous extension value */
2035                        *pExtValueSubtagEnd = 0;
2036                        pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2037
2038                        /* insert the extension to the list */
2039                        if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2040                            pLastGoodPosition = pExtValueSubtagEnd;
2041                            pExtension = NULL;
2042                        } else {
2043                        /* stop parsing here */
2044                            uprv_free(pExtension);
2045                            pExtension = NULL;
2046                            break;
2047                        }
2048                    }
2049                }
2050
2051                /* The rest of part will be private use value subtags */
2052                if (pNext == NULL) {
2053                    /* empty private use subtag */
2054                    break;
2055                }
2056                /* back up the private use value start position */
2057                pPrivuseVal = pNext;
2058
2059                /* validate private use value subtags */
2060                while (pNext) {
2061                    pSubtag = pNext;
2062                    pSep = pSubtag;
2063                    while (*pSep) {
2064                        if (*pSep == SEP) {
2065                            break;
2066                        }
2067                        pSep++;
2068                    }
2069                    if (*pSep == 0) {
2070                        /* last subtag */
2071                        pNext = NULL;
2072                    } else {
2073                        pNext = pSep + 1;
2074                    }
2075                    subtagLen = (int32_t)(pSep - pSubtag);
2076
2077                    if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2078                        *pSep = 0;
2079                        next = VART;
2080                        privateuseVar = TRUE;
2081                        break;
2082                    } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2083                        pLastGoodPosition = pSep;
2084                    } else {
2085                        break;
2086                    }
2087                }
2088
2089                if (next == VART) {
2090                    continue;
2091                }
2092
2093                if (pLastGoodPosition - pPrivuseVal > 0) {
2094                    *pLastGoodPosition = 0;
2095                    t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2096                }
2097                /* No more subtags, exiting the parse loop */
2098                break;
2099            }
2100            break;
2101        }
2102
2103        /* If we fell through here, it means this subtag is illegal - quit parsing */
2104        break;
2105    }
2106
2107    if (pExtension != NULL) {
2108        /* Process the last extension */
2109        if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2110            /* the previous extension is incomplete */
2111            uprv_free(pExtension);
2112        } else {
2113            /* terminate the previous extension value */
2114            *pExtValueSubtagEnd = 0;
2115            pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2116            /* insert the extension to the list */
2117            if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2118                pLastGoodPosition = pExtValueSubtagEnd;
2119            } else {
2120                uprv_free(pExtension);
2121            }
2122        }
2123    }
2124
2125    if (parsedLen != NULL) {
2126        *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2127    }
2128
2129    return t;
2130
2131error:
2132    uprv_free(t);
2133    return NULL;
2134}
2135
2136static void
2137ultag_close(ULanguageTag* langtag) {
2138
2139    if (langtag == NULL) {
2140        return;
2141    }
2142
2143    uprv_free(langtag->buf);
2144
2145    if (langtag->variants) {
2146        VariantListEntry *curVar = langtag->variants;
2147        while (curVar) {
2148            VariantListEntry *nextVar = curVar->next;
2149            uprv_free(curVar);
2150            curVar = nextVar;
2151        }
2152    }
2153
2154    if (langtag->extensions) {
2155        ExtensionListEntry *curExt = langtag->extensions;
2156        while (curExt) {
2157            ExtensionListEntry *nextExt = curExt->next;
2158            uprv_free(curExt);
2159            curExt = nextExt;
2160        }
2161    }
2162
2163    uprv_free(langtag);
2164}
2165
2166static const char*
2167ultag_getLanguage(const ULanguageTag* langtag) {
2168    return langtag->language;
2169}
2170
2171#if 0
2172static const char*
2173ultag_getJDKLanguage(const ULanguageTag* langtag) {
2174    int32_t i;
2175    for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2176        if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2177            return DEPRECATEDLANGS[i + 1];
2178        }
2179    }
2180    return langtag->language;
2181}
2182#endif
2183
2184static const char*
2185ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2186    if (idx >= 0 && idx < MAXEXTLANG) {
2187        return langtag->extlang[idx];
2188    }
2189    return NULL;
2190}
2191
2192static int32_t
2193ultag_getExtlangSize(const ULanguageTag* langtag) {
2194    int32_t size = 0;
2195    int32_t i;
2196    for (i = 0; i < MAXEXTLANG; i++) {
2197        if (langtag->extlang[i]) {
2198            size++;
2199        }
2200    }
2201    return size;
2202}
2203
2204static const char*
2205ultag_getScript(const ULanguageTag* langtag) {
2206    return langtag->script;
2207}
2208
2209static const char*
2210ultag_getRegion(const ULanguageTag* langtag) {
2211    return langtag->region;
2212}
2213
2214static const char*
2215ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2216    const char *var = NULL;
2217    VariantListEntry *cur = langtag->variants;
2218    int32_t i = 0;
2219    while (cur) {
2220        if (i == idx) {
2221            var = cur->variant;
2222            break;
2223        }
2224        cur = cur->next;
2225        i++;
2226    }
2227    return var;
2228}
2229
2230static int32_t
2231ultag_getVariantsSize(const ULanguageTag* langtag) {
2232    int32_t size = 0;
2233    VariantListEntry *cur = langtag->variants;
2234    while (TRUE) {
2235        if (cur == NULL) {
2236            break;
2237        }
2238        size++;
2239        cur = cur->next;
2240    }
2241    return size;
2242}
2243
2244static const char*
2245ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2246    const char *key = NULL;
2247    ExtensionListEntry *cur = langtag->extensions;
2248    int32_t i = 0;
2249    while (cur) {
2250        if (i == idx) {
2251            key = cur->key;
2252            break;
2253        }
2254        cur = cur->next;
2255        i++;
2256    }
2257    return key;
2258}
2259
2260static const char*
2261ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2262    const char *val = NULL;
2263    ExtensionListEntry *cur = langtag->extensions;
2264    int32_t i = 0;
2265    while (cur) {
2266        if (i == idx) {
2267            val = cur->value;
2268            break;
2269        }
2270        cur = cur->next;
2271        i++;
2272    }
2273    return val;
2274}
2275
2276static int32_t
2277ultag_getExtensionsSize(const ULanguageTag* langtag) {
2278    int32_t size = 0;
2279    ExtensionListEntry *cur = langtag->extensions;
2280    while (TRUE) {
2281        if (cur == NULL) {
2282            break;
2283        }
2284        size++;
2285        cur = cur->next;
2286    }
2287    return size;
2288}
2289
2290static const char*
2291ultag_getPrivateUse(const ULanguageTag* langtag) {
2292    return langtag->privateuse;
2293}
2294
2295#if 0
2296static const char*
2297ultag_getGrandfathered(const ULanguageTag* langtag) {
2298    return langtag->grandfathered;
2299}
2300#endif
2301
2302
2303/*
2304* -------------------------------------------------
2305*
2306* Locale/BCP47 conversion APIs, exposed as uloc_*
2307*
2308* -------------------------------------------------
2309*/
2310U_CAPI int32_t U_EXPORT2
2311uloc_toLanguageTag(const char* localeID,
2312                   char* langtag,
2313                   int32_t langtagCapacity,
2314                   UBool strict,
2315                   UErrorCode* status) {
2316    /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2317    char canonical[256];
2318    int32_t reslen = 0;
2319    UErrorCode tmpStatus = U_ZERO_ERROR;
2320    UBool hadPosix = FALSE;
2321    const char* pKeywordStart;
2322
2323    /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
2324    canonical[0] = 0;
2325    if (uprv_strlen(localeID) > 0) {
2326        uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2327        if (tmpStatus != U_ZERO_ERROR) {
2328            *status = U_ILLEGAL_ARGUMENT_ERROR;
2329            return 0;
2330        }
2331    }
2332
2333    /* For handling special case - private use only tag */
2334    pKeywordStart = locale_getKeywordsStart(canonical);
2335    if (pKeywordStart == canonical) {
2336        UEnumeration *kwdEnum;
2337        int kwdCnt = 0;
2338        UBool done = FALSE;
2339
2340        kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2341        if (kwdEnum != NULL) {
2342            kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2343            if (kwdCnt == 1) {
2344                const char *key;
2345                int32_t len = 0;
2346
2347                key = uenum_next(kwdEnum, &len, &tmpStatus);
2348                if (len == 1 && *key == PRIVATEUSE) {
2349                    char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2350                    buf[0] = PRIVATEUSE;
2351                    buf[1] = SEP;
2352                    len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2353                    if (U_SUCCESS(tmpStatus)) {
2354                        if (_isPrivateuseValueSubtags(&buf[2], len)) {
2355                            /* return private use only tag */
2356                            reslen = len + 2;
2357                            uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2358                            u_terminateChars(langtag, langtagCapacity, reslen, status);
2359                            done = TRUE;
2360                        } else if (strict) {
2361                            *status = U_ILLEGAL_ARGUMENT_ERROR;
2362                            done = TRUE;
2363                        }
2364                        /* if not strict mode, then "und" will be returned */
2365                    } else {
2366                        *status = U_ILLEGAL_ARGUMENT_ERROR;
2367                        done = TRUE;
2368                    }
2369                }
2370            }
2371            uenum_close(kwdEnum);
2372            if (done) {
2373                return reslen;
2374            }
2375        }
2376    }
2377
2378    reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2379    reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2380    reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2381    reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2382    reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2383    reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2384
2385    return reslen;
2386}
2387
2388
2389U_CAPI int32_t U_EXPORT2
2390uloc_forLanguageTag(const char* langtag,
2391                    char* localeID,
2392                    int32_t localeIDCapacity,
2393                    int32_t* parsedLength,
2394                    UErrorCode* status) {
2395    ULanguageTag *lt;
2396    int32_t reslen = 0;
2397    const char *subtag, *p;
2398    int32_t len;
2399    int32_t i, n;
2400    UBool noRegion = TRUE;
2401
2402    lt = ultag_parse(langtag, -1, parsedLength, status);
2403    if (U_FAILURE(*status)) {
2404        return 0;
2405    }
2406
2407    /* language */
2408    subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2409    if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2410        len = (int32_t)uprv_strlen(subtag);
2411        if (len > 0) {
2412            if (reslen < localeIDCapacity) {
2413                uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2414            }
2415            reslen += len;
2416        }
2417    }
2418
2419    /* script */
2420    subtag = ultag_getScript(lt);
2421    len = (int32_t)uprv_strlen(subtag);
2422    if (len > 0) {
2423        if (reslen < localeIDCapacity) {
2424            *(localeID + reslen) = LOCALE_SEP;
2425        }
2426        reslen++;
2427
2428        /* write out the script in title case */
2429        p = subtag;
2430        while (*p) {
2431            if (reslen < localeIDCapacity) {
2432                if (p == subtag) {
2433                    *(localeID + reslen) = uprv_toupper(*p);
2434                } else {
2435                    *(localeID + reslen) = *p;
2436                }
2437            }
2438            reslen++;
2439            p++;
2440        }
2441    }
2442
2443    /* region */
2444    subtag = ultag_getRegion(lt);
2445    len = (int32_t)uprv_strlen(subtag);
2446    if (len > 0) {
2447        if (reslen < localeIDCapacity) {
2448            *(localeID + reslen) = LOCALE_SEP;
2449        }
2450        reslen++;
2451        /* write out the retion in upper case */
2452        p = subtag;
2453        while (*p) {
2454            if (reslen < localeIDCapacity) {
2455                *(localeID + reslen) = uprv_toupper(*p);
2456            }
2457            reslen++;
2458            p++;
2459        }
2460        noRegion = FALSE;
2461    }
2462
2463    /* variants */
2464    n = ultag_getVariantsSize(lt);
2465    if (n > 0) {
2466        if (noRegion) {
2467            if (reslen < localeIDCapacity) {
2468                *(localeID + reslen) = LOCALE_SEP;
2469            }
2470            reslen++;
2471        }
2472
2473        for (i = 0; i < n; i++) {
2474            subtag = ultag_getVariant(lt, i);
2475            if (reslen < localeIDCapacity) {
2476                *(localeID + reslen) = LOCALE_SEP;
2477            }
2478            reslen++;
2479            /* write out the variant in upper case */
2480            p = subtag;
2481            while (*p) {
2482                if (reslen < localeIDCapacity) {
2483                    *(localeID + reslen) = uprv_toupper(*p);
2484                }
2485                reslen++;
2486                p++;
2487            }
2488        }
2489    }
2490
2491    /* keywords */
2492    n = ultag_getExtensionsSize(lt);
2493    subtag = ultag_getPrivateUse(lt);
2494    if (n > 0 || uprv_strlen(subtag) > 0) {
2495        if (reslen == 0 && n > 0) {
2496            /* need a language */
2497            if (reslen < localeIDCapacity) {
2498                uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2499            }
2500            reslen += LANG_UND_LEN;
2501        }
2502        len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2503        reslen += len;
2504    }
2505
2506    ultag_close(lt);
2507    return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2508}
2509
2510
2511