1/*
2**********************************************************************
3*   Copyright (C) 2009-2012, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*/
7
8#include "unicode/utypes.h"
9#include "unicode/ures.h"
10#include "unicode/putil.h"
11#include "unicode/uloc.h"
12#include "ustr_imp.h"
13#include "cmemory.h"
14#include "cstring.h"
15#include "putilimp.h"
16#include "uinvchar.h"
17#include "ulocimp.h"
18#include "uassert.h"
19
20#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
21
22/* struct holding a single variant */
23typedef struct VariantListEntry {
24    const char              *variant;
25    struct VariantListEntry *next;
26} VariantListEntry;
27
28/* struct holding a single attribute value */
29typedef struct AttributeListEntry {
30    const char              *attribute;
31    struct AttributeListEntry *next;
32} AttributeListEntry;
33
34/* struct holding a single extension */
35typedef struct ExtensionListEntry {
36    const char                  *key;
37    const char                  *value;
38    struct ExtensionListEntry   *next;
39} ExtensionListEntry;
40
41#define MAXEXTLANG 3
42typedef struct ULanguageTag {
43    char                *buf;   /* holding parsed subtags */
44    const char          *language;
45    const char          *extlang[MAXEXTLANG];
46    const char          *script;
47    const char          *region;
48    VariantListEntry    *variants;
49    ExtensionListEntry  *extensions;
50    const char          *privateuse;
51    const char          *grandfathered;
52} ULanguageTag;
53
54#define MINLEN 2
55#define SEP '-'
56#define PRIVATEUSE 'x'
57#define LDMLEXT 'u'
58
59#define LOCALE_SEP '_'
60#define LOCALE_EXT_SEP '@'
61#define LOCALE_KEYWORD_SEP ';'
62#define LOCALE_KEY_TYPE_SEP '='
63
64#define ISALPHA(c) uprv_isASCIILetter(c)
65#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
66
67static const char EMPTY[] = "";
68static const char LANG_UND[] = "und";
69static const char PRIVATEUSE_KEY[] = "x";
70static const char _POSIX[] = "_POSIX";
71static const char POSIX_KEY[] = "va";
72static const char POSIX_VALUE[] = "posix";
73static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
74static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
75static const char LOCALE_TYPE_YES[] = "yes";
76
77#define LANG_UND_LEN 3
78
79static const char* const GRANDFATHERED[] = {
80/*  grandfathered   preferred */
81    "art-lojban",   "jbo",
82    "cel-gaulish",  "xtg-x-cel-gaulish",
83    "en-GB-oed",    "en-GB-x-oed",
84    "i-ami",        "ami",
85    "i-bnn",        "bnn",
86    "i-default",    "en-x-i-default",
87    "i-enochian",   "und-x-i-enochian",
88    "i-hak",        "hak",
89    "i-klingon",    "tlh",
90    "i-lux",        "lb",
91    "i-mingo",      "see-x-i-mingo",
92    "i-navajo",     "nv",
93    "i-pwn",        "pwn",
94    "i-tao",        "tao",
95    "i-tay",        "tay",
96    "i-tsu",        "tsu",
97    "no-bok",       "nb",
98    "no-nyn",       "nn",
99    "sgn-be-fr",    "sfb",
100    "sgn-be-nl",    "vgt",
101    "sgn-ch-de",    "sgg",
102    "zh-guoyu",     "cmn",
103    "zh-hakka",     "hak",
104    "zh-min",       "nan-x-zh-min",
105    "zh-min-nan",   "nan",
106    "zh-xiang",     "hsn",
107    NULL,           NULL
108};
109
110static const char DEPRECATEDLANGS[][4] = {
111/*  deprecated  new */
112    "iw",       "he",
113    "ji",       "yi",
114    "in",       "id"
115};
116
117/*
118* -------------------------------------------------
119*
120* These ultag_ functions may be exposed as APIs later
121*
122* -------------------------------------------------
123*/
124
125static ULanguageTag*
126ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
127
128static void
129ultag_close(ULanguageTag* langtag);
130
131static const char*
132ultag_getLanguage(const ULanguageTag* langtag);
133
134#if 0
135static const char*
136ultag_getJDKLanguage(const ULanguageTag* langtag);
137#endif
138
139static const char*
140ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
141
142static int32_t
143ultag_getExtlangSize(const ULanguageTag* langtag);
144
145static const char*
146ultag_getScript(const ULanguageTag* langtag);
147
148static const char*
149ultag_getRegion(const ULanguageTag* langtag);
150
151static const char*
152ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
153
154static int32_t
155ultag_getVariantsSize(const ULanguageTag* langtag);
156
157static const char*
158ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
159
160static const char*
161ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
162
163static int32_t
164ultag_getExtensionsSize(const ULanguageTag* langtag);
165
166static const char*
167ultag_getPrivateUse(const ULanguageTag* langtag);
168
169#if 0
170static const char*
171ultag_getGrandfathered(const ULanguageTag* langtag);
172#endif
173
174/*
175* -------------------------------------------------
176*
177* Language subtag syntax validation functions
178*
179* -------------------------------------------------
180*/
181
182static UBool
183_isAlphaString(const char* s, int32_t len) {
184    int32_t i;
185    for (i = 0; i < len; i++) {
186        if (!ISALPHA(*(s + i))) {
187            return FALSE;
188        }
189    }
190    return TRUE;
191}
192
193static UBool
194_isNumericString(const char* s, int32_t len) {
195    int32_t i;
196    for (i = 0; i < len; i++) {
197        if (!ISNUMERIC(*(s + i))) {
198            return FALSE;
199        }
200    }
201    return TRUE;
202}
203
204static UBool
205_isAlphaNumericString(const char* s, int32_t len) {
206    int32_t i;
207    for (i = 0; i < len; i++) {
208        if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
209            return FALSE;
210        }
211    }
212    return TRUE;
213}
214
215static UBool
216_isLanguageSubtag(const char* s, int32_t len) {
217    /*
218     * language      = 2*3ALPHA            ; shortest ISO 639 code
219     *                 ["-" extlang]       ; sometimes followed by
220     *                                     ;   extended language subtags
221     *               / 4ALPHA              ; or reserved for future use
222     *               / 5*8ALPHA            ; or registered language subtag
223     */
224    if (len < 0) {
225        len = (int32_t)uprv_strlen(s);
226    }
227    if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
228        return TRUE;
229    }
230    return FALSE;
231}
232
233static UBool
234_isExtlangSubtag(const char* s, int32_t len) {
235    /*
236     * extlang       = 3ALPHA              ; selected ISO 639 codes
237     *                 *2("-" 3ALPHA)      ; permanently reserved
238     */
239    if (len < 0) {
240        len = (int32_t)uprv_strlen(s);
241    }
242    if (len == 3 && _isAlphaString(s, len)) {
243        return TRUE;
244    }
245    return FALSE;
246}
247
248static UBool
249_isScriptSubtag(const char* s, int32_t len) {
250    /*
251     * script        = 4ALPHA              ; ISO 15924 code
252     */
253    if (len < 0) {
254        len = (int32_t)uprv_strlen(s);
255    }
256    if (len == 4 && _isAlphaString(s, len)) {
257        return TRUE;
258    }
259    return FALSE;
260}
261
262static UBool
263_isRegionSubtag(const char* s, int32_t len) {
264    /*
265     * region        = 2ALPHA              ; ISO 3166-1 code
266     *               / 3DIGIT              ; UN M.49 code
267     */
268    if (len < 0) {
269        len = (int32_t)uprv_strlen(s);
270    }
271    if (len == 2 && _isAlphaString(s, len)) {
272        return TRUE;
273    }
274    if (len == 3 && _isNumericString(s, len)) {
275        return TRUE;
276    }
277    return FALSE;
278}
279
280static UBool
281_isVariantSubtag(const char* s, int32_t len) {
282    /*
283     * variant       = 5*8alphanum         ; registered variants
284     *               / (DIGIT 3alphanum)
285     */
286    if (len < 0) {
287        len = (int32_t)uprv_strlen(s);
288    }
289    if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
290        return TRUE;
291    }
292    if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
293        return TRUE;
294    }
295    return FALSE;
296}
297
298static UBool
299_isPrivateuseVariantSubtag(const char* s, int32_t len) {
300    /*
301     * variant       = 1*8alphanum         ; registered variants
302     *               / (DIGIT 3alphanum)
303     */
304    if (len < 0) {
305        len = (int32_t)uprv_strlen(s);
306    }
307    if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
308        return TRUE;
309    }
310    return FALSE;
311}
312
313static UBool
314_isExtensionSingleton(const char* s, int32_t len) {
315    /*
316     * extension     = singleton 1*("-" (2*8alphanum))
317     */
318    if (len < 0) {
319        len = (int32_t)uprv_strlen(s);
320    }
321    if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
322        return TRUE;
323    }
324    return FALSE;
325}
326
327static UBool
328_isExtensionSubtag(const char* s, int32_t len) {
329    /*
330     * extension     = singleton 1*("-" (2*8alphanum))
331     */
332    if (len < 0) {
333        len = (int32_t)uprv_strlen(s);
334    }
335    if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
336        return TRUE;
337    }
338    return FALSE;
339}
340
341static UBool
342_isExtensionSubtags(const char* s, int32_t len) {
343    const char *p = s;
344    const char *pSubtag = NULL;
345
346    if (len < 0) {
347        len = (int32_t)uprv_strlen(s);
348    }
349
350    while ((p - s) < len) {
351        if (*p == SEP) {
352            if (pSubtag == NULL) {
353                return FALSE;
354            }
355            if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
356                return FALSE;
357            }
358            pSubtag = NULL;
359        } else if (pSubtag == NULL) {
360            pSubtag = p;
361        }
362        p++;
363    }
364    if (pSubtag == NULL) {
365        return FALSE;
366    }
367    return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
368}
369
370static UBool
371_isPrivateuseValueSubtag(const char* s, int32_t len) {
372    /*
373     * privateuse    = "x" 1*("-" (1*8alphanum))
374     */
375    if (len < 0) {
376        len = (int32_t)uprv_strlen(s);
377    }
378    if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
379        return TRUE;
380    }
381    return FALSE;
382}
383
384static UBool
385_isPrivateuseValueSubtags(const char* s, int32_t len) {
386    const char *p = s;
387    const char *pSubtag = NULL;
388
389    if (len < 0) {
390        len = (int32_t)uprv_strlen(s);
391    }
392
393    while ((p - s) < len) {
394        if (*p == SEP) {
395            if (pSubtag == NULL) {
396                return FALSE;
397            }
398            if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
399                return FALSE;
400            }
401            pSubtag = NULL;
402        } else if (pSubtag == NULL) {
403            pSubtag = p;
404        }
405        p++;
406    }
407    if (pSubtag == NULL) {
408        return FALSE;
409    }
410    return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
411}
412
413static UBool
414_isLDMLKey(const char* s, int32_t len) {
415    if (len < 0) {
416        len = (int32_t)uprv_strlen(s);
417    }
418    if (len == 2 && _isAlphaNumericString(s, len)) {
419        return TRUE;
420    }
421    return FALSE;
422}
423
424static UBool
425_isLDMLType(const char* s, int32_t len) {
426    if (len < 0) {
427        len = (int32_t)uprv_strlen(s);
428    }
429    if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
430        return TRUE;
431    }
432    return FALSE;
433}
434
435/*
436* -------------------------------------------------
437*
438* Helper functions
439*
440* -------------------------------------------------
441*/
442
443static UBool
444_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
445    UBool bAdded = TRUE;
446
447    if (*first == NULL) {
448        var->next = NULL;
449        *first = var;
450    } else {
451        VariantListEntry *prev, *cur;
452        int32_t cmp;
453
454        /* variants order should be preserved */
455        prev = NULL;
456        cur = *first;
457        while (TRUE) {
458            if (cur == NULL) {
459                prev->next = var;
460                var->next = NULL;
461                break;
462            }
463
464            /* Checking for duplicate variant */
465            cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
466            if (cmp == 0) {
467                /* duplicated variant */
468                bAdded = FALSE;
469                break;
470            }
471            prev = cur;
472            cur = cur->next;
473        }
474    }
475
476    return bAdded;
477}
478
479static UBool
480_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
481    UBool bAdded = TRUE;
482
483    if (*first == NULL) {
484        attr->next = NULL;
485        *first = attr;
486    } else {
487        AttributeListEntry *prev, *cur;
488        int32_t cmp;
489
490        /* reorder variants in alphabetical order */
491        prev = NULL;
492        cur = *first;
493        while (TRUE) {
494            if (cur == NULL) {
495                prev->next = attr;
496                attr->next = NULL;
497                break;
498            }
499            cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
500            if (cmp < 0) {
501                if (prev == NULL) {
502                    *first = attr;
503                } else {
504                    prev->next = attr;
505                }
506                attr->next = cur;
507                break;
508            }
509            if (cmp == 0) {
510                /* duplicated variant */
511                bAdded = FALSE;
512                break;
513            }
514            prev = cur;
515            cur = cur->next;
516        }
517    }
518
519    return bAdded;
520}
521
522
523static UBool
524_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
525    UBool bAdded = TRUE;
526
527    if (*first == NULL) {
528        ext->next = NULL;
529        *first = ext;
530    } else {
531        ExtensionListEntry *prev, *cur;
532        int32_t cmp;
533
534        /* reorder variants in alphabetical order */
535        prev = NULL;
536        cur = *first;
537        while (TRUE) {
538            if (cur == NULL) {
539                prev->next = ext;
540                ext->next = NULL;
541                break;
542            }
543            if (localeToBCP) {
544                /* special handling for locale to bcp conversion */
545                int32_t len, curlen;
546
547                len = (int32_t)uprv_strlen(ext->key);
548                curlen = (int32_t)uprv_strlen(cur->key);
549
550                if (len == 1 && curlen == 1) {
551                    if (*(ext->key) == *(cur->key)) {
552                        cmp = 0;
553                    } else if (*(ext->key) == PRIVATEUSE) {
554                        cmp = 1;
555                    } else if (*(cur->key) == PRIVATEUSE) {
556                        cmp = -1;
557                    } else {
558                        cmp = *(ext->key) - *(cur->key);
559                    }
560                } else if (len == 1) {
561                    cmp = *(ext->key) - LDMLEXT;
562                } else if (curlen == 1) {
563                    cmp = LDMLEXT - *(cur->key);
564                } else {
565                    cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
566                }
567            } else {
568                cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
569            }
570            if (cmp < 0) {
571                if (prev == NULL) {
572                    *first = ext;
573                } else {
574                    prev->next = ext;
575                }
576                ext->next = cur;
577                break;
578            }
579            if (cmp == 0) {
580                /* duplicated extension key */
581                bAdded = FALSE;
582                break;
583            }
584            prev = cur;
585            cur = cur->next;
586        }
587    }
588
589    return bAdded;
590}
591
592static void
593_initializeULanguageTag(ULanguageTag* langtag) {
594    int32_t i;
595
596    langtag->buf = NULL;
597
598    langtag->language = EMPTY;
599    for (i = 0; i < MAXEXTLANG; i++) {
600        langtag->extlang[i] = NULL;
601    }
602
603    langtag->script = EMPTY;
604    langtag->region = EMPTY;
605
606    langtag->variants = NULL;
607    langtag->extensions = NULL;
608
609    langtag->grandfathered = EMPTY;
610    langtag->privateuse = EMPTY;
611}
612
613#define KEYTYPEDATA     "keyTypeData"
614#define KEYMAP          "keyMap"
615#define TYPEMAP         "typeMap"
616#define TYPEALIAS       "typeAlias"
617#define MAX_BCP47_SUBTAG_LEN    9   /* including null terminator */
618#define MAX_LDML_KEY_LEN        22
619#define MAX_LDML_TYPE_LEN       32
620
621static int32_t
622_ldmlKeyToBCP47(const char* key, int32_t keyLen,
623                char* bcpKey, int32_t bcpKeyCapacity,
624                UErrorCode *status) {
625    UResourceBundle *rb;
626    char keyBuf[MAX_LDML_KEY_LEN];
627    char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
628    int32_t resultLen = 0;
629    int32_t i;
630    UErrorCode tmpStatus = U_ZERO_ERROR;
631    const UChar *uBcpKey;
632    int32_t bcpKeyLen;
633
634    if (keyLen < 0) {
635        keyLen = (int32_t)uprv_strlen(key);
636    }
637
638    if (keyLen >= sizeof(keyBuf)) {
639        /* no known valid LDML key exceeding 21 */
640        *status = U_ILLEGAL_ARGUMENT_ERROR;
641        return 0;
642    }
643
644    uprv_memcpy(keyBuf, key, keyLen);
645    keyBuf[keyLen] = 0;
646
647    /* to lower case */
648    for (i = 0; i < keyLen; i++) {
649        keyBuf[i] = uprv_tolower(keyBuf[i]);
650    }
651
652    rb = ures_openDirect(NULL, KEYTYPEDATA, status);
653    ures_getByKey(rb, KEYMAP, rb, status);
654
655    if (U_FAILURE(*status)) {
656        ures_close(rb);
657        return 0;
658    }
659
660    uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
661    if (U_SUCCESS(tmpStatus)) {
662        u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
663        bcpKeyBuf[bcpKeyLen] = 0;
664        resultLen = bcpKeyLen;
665    } else {
666        if (_isLDMLKey(key, keyLen)) {
667            uprv_memcpy(bcpKeyBuf, key, keyLen);
668            bcpKeyBuf[keyLen] = 0;
669            resultLen = keyLen;
670        } else {
671            /* mapping not availabe */
672            *status = U_ILLEGAL_ARGUMENT_ERROR;
673        }
674    }
675    ures_close(rb);
676
677    if (U_FAILURE(*status)) {
678        return 0;
679    }
680
681    uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
682    return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
683}
684
685static int32_t
686_bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
687                char* key, int32_t keyCapacity,
688                UErrorCode *status) {
689    UResourceBundle *rb;
690    char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
691    int32_t resultLen = 0;
692    int32_t i;
693    const char *resKey = NULL;
694    UResourceBundle *mapData;
695
696    if (bcpKeyLen < 0) {
697        bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
698    }
699
700    if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
701        *status = U_ILLEGAL_ARGUMENT_ERROR;
702        return 0;
703    }
704
705    uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
706    bcpKeyBuf[bcpKeyLen] = 0;
707
708    /* to lower case */
709    for (i = 0; i < bcpKeyLen; i++) {
710        bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
711    }
712
713    rb = ures_openDirect(NULL, KEYTYPEDATA, status);
714    ures_getByKey(rb, KEYMAP, rb, status);
715    if (U_FAILURE(*status)) {
716        ures_close(rb);
717        return 0;
718    }
719
720    mapData = ures_getNextResource(rb, NULL, status);
721    while (U_SUCCESS(*status)) {
722        const UChar *uBcpKey;
723        char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
724        int32_t tmpBcpKeyLen;
725
726        uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
727        if (U_FAILURE(*status)) {
728            break;
729        }
730        u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
731        tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
732        if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
733            /* found a matching BCP47 key */
734            resKey = ures_getKey(mapData);
735            resultLen = (int32_t)uprv_strlen(resKey);
736            break;
737        }
738        if (!ures_hasNext(rb)) {
739            break;
740        }
741        ures_getNextResource(rb, mapData, status);
742    }
743    ures_close(mapData);
744    ures_close(rb);
745
746    if (U_FAILURE(*status)) {
747        return 0;
748    }
749
750    if (resKey == NULL) {
751        resKey = bcpKeyBuf;
752        resultLen = bcpKeyLen;
753    }
754
755    uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
756    return u_terminateChars(key, keyCapacity, resultLen, status);
757}
758
759static int32_t
760_ldmlTypeToBCP47(const char* key, int32_t keyLen,
761                 const char* type, int32_t typeLen,
762                 char* bcpType, int32_t bcpTypeCapacity,
763                 UErrorCode *status) {
764    UResourceBundle *rb, *keyTypeData, *typeMapForKey;
765    char keyBuf[MAX_LDML_KEY_LEN];
766    char typeBuf[MAX_LDML_TYPE_LEN];
767    char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
768    int32_t resultLen = 0;
769    int32_t i;
770    UErrorCode tmpStatus = U_ZERO_ERROR;
771    const UChar *uBcpType, *uCanonicalType;
772    int32_t bcpTypeLen, canonicalTypeLen;
773    UBool isTimezone = FALSE;
774
775    if (keyLen < 0) {
776        keyLen = (int32_t)uprv_strlen(key);
777    }
778    if (keyLen >= sizeof(keyBuf)) {
779        /* no known valid LDML key exceeding 21 */
780        *status = U_ILLEGAL_ARGUMENT_ERROR;
781        return 0;
782    }
783    uprv_memcpy(keyBuf, key, keyLen);
784    keyBuf[keyLen] = 0;
785
786    /* to lower case */
787    for (i = 0; i < keyLen; i++) {
788        keyBuf[i] = uprv_tolower(keyBuf[i]);
789    }
790    if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
791        isTimezone = TRUE;
792    }
793
794    if (typeLen < 0) {
795        typeLen = (int32_t)uprv_strlen(type);
796    }
797    if (typeLen >= sizeof(typeBuf)) {
798        *status = U_ILLEGAL_ARGUMENT_ERROR;
799        return 0;
800    }
801
802    if (isTimezone) {
803        /* replace '/' with ':' */
804        for (i = 0; i < typeLen; i++) {
805            if (*(type + i) == '/') {
806                typeBuf[i] = ':';
807            } else {
808                typeBuf[i] = *(type + i);
809            }
810        }
811        typeBuf[typeLen] = 0;
812        type = &typeBuf[0];
813    }
814
815    keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
816    rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
817    if (U_FAILURE(*status)) {
818        ures_close(rb);
819        ures_close(keyTypeData);
820        return 0;
821    }
822
823    typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
824    uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
825    if (U_SUCCESS(tmpStatus)) {
826        u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
827        resultLen = bcpTypeLen;
828    } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
829        /* is this type alias? */
830        tmpStatus = U_ZERO_ERROR;
831        ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
832        ures_getByKey(rb, keyBuf, rb, &tmpStatus);
833        uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
834        if (U_SUCCESS(tmpStatus)) {
835            u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
836            if (isTimezone) {
837                /* replace '/' with ':' */
838                for (i = 0; i < canonicalTypeLen; i++) {
839                    if (typeBuf[i] == '/') {
840                        typeBuf[i] = ':';
841                    }
842                }
843            }
844            typeBuf[canonicalTypeLen] = 0;
845
846            /* look up the canonical type */
847            uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
848            if (U_SUCCESS(tmpStatus)) {
849                u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
850                resultLen = bcpTypeLen;
851            }
852        }
853        if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
854            if (_isLDMLType(type, typeLen)) {
855                uprv_memcpy(bcpTypeBuf, type, typeLen);
856                resultLen = typeLen;
857            } else {
858                /* mapping not availabe */
859                *status = U_ILLEGAL_ARGUMENT_ERROR;
860            }
861        }
862    } else {
863        *status = tmpStatus;
864    }
865    ures_close(rb);
866    ures_close(typeMapForKey);
867    ures_close(keyTypeData);
868
869    if (U_FAILURE(*status)) {
870        return 0;
871    }
872
873    uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
874    return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
875}
876
877static int32_t
878_bcp47ToLDMLType(const char* key, int32_t keyLen,
879                 const char* bcpType, int32_t bcpTypeLen,
880                 char* type, int32_t typeCapacity,
881                 UErrorCode *status) {
882    UResourceBundle *rb;
883    char keyBuf[MAX_LDML_KEY_LEN];
884    char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
885    int32_t resultLen = 0;
886    int32_t i, typeSize;
887    const char *resType = NULL;
888    UResourceBundle *mapData;
889    UErrorCode tmpStatus = U_ZERO_ERROR;
890    int32_t copyLen;
891
892    if (keyLen < 0) {
893        keyLen = (int32_t)uprv_strlen(key);
894    }
895
896    if (keyLen >= sizeof(keyBuf)) {
897        /* no known valid LDML key exceeding 21 */
898        *status = U_ILLEGAL_ARGUMENT_ERROR;
899        return 0;
900    }
901    uprv_memcpy(keyBuf, key, keyLen);
902    keyBuf[keyLen] = 0;
903
904    /* to lower case */
905    for (i = 0; i < keyLen; i++) {
906        keyBuf[i] = uprv_tolower(keyBuf[i]);
907    }
908
909
910    if (bcpTypeLen < 0) {
911        bcpTypeLen = (int32_t)uprv_strlen(bcpType);
912    }
913
914    typeSize = 0;
915    for (i = 0; i < bcpTypeLen; i++) {
916        if (bcpType[i] == SEP) {
917            if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
918                *status = U_ILLEGAL_ARGUMENT_ERROR;
919                return 0;
920            }
921            typeSize = 0;
922        } else {
923            typeSize++;
924        }
925    }
926
927    uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
928    bcpTypeBuf[bcpTypeLen] = 0;
929
930    /* to lower case */
931    for (i = 0; i < bcpTypeLen; i++) {
932        bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
933    }
934
935    rb = ures_openDirect(NULL, KEYTYPEDATA, status);
936    ures_getByKey(rb, TYPEMAP, rb, status);
937    if (U_FAILURE(*status)) {
938        ures_close(rb);
939        return 0;
940    }
941
942    ures_getByKey(rb, keyBuf, rb, &tmpStatus);
943    mapData = ures_getNextResource(rb, NULL, &tmpStatus);
944    while (U_SUCCESS(tmpStatus)) {
945        const UChar *uBcpType;
946        char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
947        int32_t tmpBcpTypeLen;
948
949        uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
950        if (U_FAILURE(tmpStatus)) {
951            break;
952        }
953        u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
954        tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
955        if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
956            /* found a matching BCP47 type */
957            resType = ures_getKey(mapData);
958            resultLen = (int32_t)uprv_strlen(resType);
959            break;
960        }
961        if (!ures_hasNext(rb)) {
962            break;
963        }
964        ures_getNextResource(rb, mapData, &tmpStatus);
965    }
966    ures_close(mapData);
967    ures_close(rb);
968
969    if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
970        *status = tmpStatus;
971        return 0;
972    }
973
974    if (resType == NULL) {
975        resType = bcpTypeBuf;
976        resultLen = bcpTypeLen;
977    }
978
979    copyLen = uprv_min(resultLen, typeCapacity);
980    uprv_memcpy(type, resType, copyLen);
981
982    if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
983        for (i = 0; i < copyLen; i++) {
984            if (*(type + i) == ':') {
985                *(type + i) = '/';
986            }
987        }
988    }
989
990    return u_terminateChars(type, typeCapacity, resultLen, status);
991}
992
993static int32_t
994_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
995    char buf[ULOC_LANG_CAPACITY];
996    UErrorCode tmpStatus = U_ZERO_ERROR;
997    int32_t len, i;
998    int32_t reslen = 0;
999
1000    if (U_FAILURE(*status)) {
1001        return 0;
1002    }
1003
1004    len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
1005    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1006        if (strict) {
1007            *status = U_ILLEGAL_ARGUMENT_ERROR;
1008            return 0;
1009        }
1010        len = 0;
1011    }
1012
1013    /* Note: returned language code is in lower case letters */
1014
1015    if (len == 0) {
1016        if (reslen < capacity) {
1017            uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1018        }
1019        reslen += LANG_UND_LEN;
1020    } else if (!_isLanguageSubtag(buf, len)) {
1021            /* invalid language code */
1022        if (strict) {
1023            *status = U_ILLEGAL_ARGUMENT_ERROR;
1024            return 0;
1025        }
1026        if (reslen < capacity) {
1027            uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1028        }
1029        reslen += LANG_UND_LEN;
1030    } else {
1031        /* resolve deprecated */
1032        for (i = 0; i < LENGTHOF(DEPRECATEDLANGS); i += 2) {
1033            if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
1034                uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
1035                len = (int32_t)uprv_strlen(buf);
1036                break;
1037            }
1038        }
1039        if (reslen < capacity) {
1040            uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1041        }
1042        reslen += len;
1043    }
1044    u_terminateChars(appendAt, capacity, reslen, status);
1045    return reslen;
1046}
1047
1048static int32_t
1049_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1050    char buf[ULOC_SCRIPT_CAPACITY];
1051    UErrorCode tmpStatus = U_ZERO_ERROR;
1052    int32_t len;
1053    int32_t reslen = 0;
1054
1055    if (U_FAILURE(*status)) {
1056        return 0;
1057    }
1058
1059    len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
1060    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1061        if (strict) {
1062            *status = U_ILLEGAL_ARGUMENT_ERROR;
1063        }
1064        return 0;
1065    }
1066
1067    if (len > 0) {
1068        if (!_isScriptSubtag(buf, len)) {
1069            /* invalid script code */
1070            if (strict) {
1071                *status = U_ILLEGAL_ARGUMENT_ERROR;
1072            }
1073            return 0;
1074        } else {
1075            if (reslen < capacity) {
1076                *(appendAt + reslen) = SEP;
1077            }
1078            reslen++;
1079
1080            if (reslen < capacity) {
1081                uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1082            }
1083            reslen += len;
1084        }
1085    }
1086    u_terminateChars(appendAt, capacity, reslen, status);
1087    return reslen;
1088}
1089
1090static int32_t
1091_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1092    char buf[ULOC_COUNTRY_CAPACITY];
1093    UErrorCode tmpStatus = U_ZERO_ERROR;
1094    int32_t len;
1095    int32_t reslen = 0;
1096
1097    if (U_FAILURE(*status)) {
1098        return 0;
1099    }
1100
1101    len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
1102    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1103        if (strict) {
1104            *status = U_ILLEGAL_ARGUMENT_ERROR;
1105        }
1106        return 0;
1107    }
1108
1109    if (len > 0) {
1110        if (!_isRegionSubtag(buf, len)) {
1111            /* invalid region code */
1112            if (strict) {
1113                *status = U_ILLEGAL_ARGUMENT_ERROR;
1114            }
1115            return 0;
1116        } else {
1117            if (reslen < capacity) {
1118                *(appendAt + reslen) = SEP;
1119            }
1120            reslen++;
1121
1122            if (reslen < capacity) {
1123                uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1124            }
1125            reslen += len;
1126        }
1127    }
1128    u_terminateChars(appendAt, capacity, reslen, status);
1129    return reslen;
1130}
1131
1132static int32_t
1133_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1134    char buf[ULOC_FULLNAME_CAPACITY];
1135    UErrorCode tmpStatus = U_ZERO_ERROR;
1136    int32_t len, i;
1137    int32_t reslen = 0;
1138
1139    if (U_FAILURE(*status)) {
1140        return 0;
1141    }
1142
1143    len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1144    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1145        if (strict) {
1146            *status = U_ILLEGAL_ARGUMENT_ERROR;
1147        }
1148        return 0;
1149    }
1150
1151    if (len > 0) {
1152        char *p, *pVar;
1153        UBool bNext = TRUE;
1154        VariantListEntry *var;
1155        VariantListEntry *varFirst = NULL;
1156
1157        pVar = NULL;
1158        p = buf;
1159        while (bNext) {
1160            if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1161                if (*p == 0) {
1162                    bNext = FALSE;
1163                } else {
1164                    *p = 0; /* terminate */
1165                }
1166                if (pVar == NULL) {
1167                    if (strict) {
1168                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1169                        break;
1170                    }
1171                    /* ignore empty variant */
1172                } else {
1173                    /* ICU uses upper case letters for variants, but
1174                       the canonical format is lowercase in BCP47 */
1175                    for (i = 0; *(pVar + i) != 0; i++) {
1176                        *(pVar + i) = uprv_tolower(*(pVar + i));
1177                    }
1178
1179                    /* validate */
1180                    if (_isVariantSubtag(pVar, -1)) {
1181                        if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
1182                            /* emit the variant to the list */
1183                            var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1184                            if (var == NULL) {
1185                                *status = U_MEMORY_ALLOCATION_ERROR;
1186                                break;
1187                            }
1188                            var->variant = pVar;
1189                            if (!_addVariantToList(&varFirst, var)) {
1190                                /* duplicated variant */
1191                                uprv_free(var);
1192                                if (strict) {
1193                                    *status = U_ILLEGAL_ARGUMENT_ERROR;
1194                                    break;
1195                                }
1196                            }
1197                        } else {
1198                            /* Special handling for POSIX variant, need to remember that we had it and then */
1199                            /* treat it like an extension later. */
1200                            *hadPosix = TRUE;
1201                        }
1202                    } else if (strict) {
1203                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1204                        break;
1205                    } else if (_isPrivateuseValueSubtag(pVar, -1)) {
1206                        /* Handle private use subtags separately */
1207                        break;
1208                    }
1209                }
1210                /* reset variant starting position */
1211                pVar = NULL;
1212            } else if (pVar == NULL) {
1213                pVar = p;
1214            }
1215            p++;
1216        }
1217
1218        if (U_SUCCESS(*status)) {
1219            if (varFirst != NULL) {
1220                int32_t varLen;
1221
1222                /* write out validated/normalized variants to the target */
1223                var = varFirst;
1224                while (var != NULL) {
1225                    if (reslen < capacity) {
1226                        *(appendAt + reslen) = SEP;
1227                    }
1228                    reslen++;
1229                    varLen = (int32_t)uprv_strlen(var->variant);
1230                    if (reslen < capacity) {
1231                        uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
1232                    }
1233                    reslen += varLen;
1234                    var = var->next;
1235                }
1236            }
1237        }
1238
1239        /* clean up */
1240        var = varFirst;
1241        while (var != NULL) {
1242            VariantListEntry *tmpVar = var->next;
1243            uprv_free(var);
1244            var = tmpVar;
1245        }
1246
1247        if (U_FAILURE(*status)) {
1248            return 0;
1249        }
1250    }
1251
1252    u_terminateChars(appendAt, capacity, reslen, status);
1253    return reslen;
1254}
1255
1256static int32_t
1257_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1258    char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1259    char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
1260    int32_t attrBufLength = 0;
1261    UBool isAttribute = FALSE;
1262    UEnumeration *keywordEnum = NULL;
1263    int32_t reslen = 0;
1264
1265    keywordEnum = uloc_openKeywords(localeID, status);
1266    if (U_FAILURE(*status) && !hadPosix) {
1267        uenum_close(keywordEnum);
1268        return 0;
1269    }
1270    if (keywordEnum != NULL || hadPosix) {
1271        /* reorder extensions */
1272        int32_t len;
1273        const char *key;
1274        ExtensionListEntry *firstExt = NULL;
1275        ExtensionListEntry *ext;
1276        AttributeListEntry *firstAttr = NULL;
1277        AttributeListEntry *attr;
1278        char *attrValue;
1279        char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1280        char *pExtBuf = extBuf;
1281        int32_t extBufCapacity = sizeof(extBuf);
1282        const char *bcpKey, *bcpValue;
1283        UErrorCode tmpStatus = U_ZERO_ERROR;
1284        int32_t keylen;
1285        UBool isLDMLKeyword;
1286
1287        while (TRUE) {
1288            isAttribute = FALSE;
1289            key = uenum_next(keywordEnum, NULL, status);
1290            if (key == NULL) {
1291                break;
1292            }
1293            len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
1294            if (U_FAILURE(tmpStatus)) {
1295                if (strict) {
1296                    *status = U_ILLEGAL_ARGUMENT_ERROR;
1297                    break;
1298                }
1299                /* ignore this keyword */
1300                tmpStatus = U_ZERO_ERROR;
1301                continue;
1302            }
1303
1304            keylen = (int32_t)uprv_strlen(key);
1305            isLDMLKeyword = (keylen > 1);
1306
1307            /* special keyword used for representing Unicode locale attributes */
1308            if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
1309                isAttribute = TRUE;
1310                if (len > 0) {
1311                    int32_t i = 0;
1312                    while (TRUE) {
1313                        attrBufLength = 0;
1314                        for (; i < len; i++) {
1315                            if (buf[i] != '-') {
1316                                attrBuf[attrBufLength++] = buf[i];
1317                            } else {
1318                                i++;
1319                                break;
1320                            }
1321                        }
1322                        if (attrBufLength > 0) {
1323                            attrBuf[attrBufLength] = 0;
1324
1325                        } else if (i >= len){
1326                            break;
1327                        }
1328
1329                        /* create AttributeListEntry */
1330                        attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1331                        if (attr == NULL) {
1332                            *status = U_MEMORY_ALLOCATION_ERROR;
1333                            break;
1334                        }
1335                        attrValue = (char*)uprv_malloc(attrBufLength + 1);
1336                        if (attrValue == NULL) {
1337                            *status = U_MEMORY_ALLOCATION_ERROR;
1338                            break;
1339                        }
1340                        uprv_strcpy(attrValue, attrBuf);
1341                        attr->attribute = attrValue;
1342
1343                        if (!_addAttributeToList(&firstAttr, attr)) {
1344                            uprv_free(attr);
1345                            uprv_free(attrValue);
1346                            if (strict) {
1347                                *status = U_ILLEGAL_ARGUMENT_ERROR;
1348                                break;
1349                            }
1350                        }
1351                    }
1352                }
1353            } else if (isLDMLKeyword) {
1354                int32_t modKeyLen;
1355
1356                /* transform key and value to bcp47 style */
1357                modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
1358                if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1359                    if (strict) {
1360                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1361                        break;
1362                    }
1363                    tmpStatus = U_ZERO_ERROR;
1364                    continue;
1365                }
1366
1367                bcpKey = pExtBuf;
1368                pExtBuf += (modKeyLen + 1);
1369                extBufCapacity -= (modKeyLen + 1);
1370
1371                len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
1372                if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1373                    if (strict) {
1374                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1375                        break;
1376                    }
1377                    tmpStatus = U_ZERO_ERROR;
1378                    continue;
1379                }
1380                bcpValue = pExtBuf;
1381                pExtBuf += (len + 1);
1382                extBufCapacity -= (len + 1);
1383            } else {
1384                if (*key == PRIVATEUSE) {
1385                    if (!_isPrivateuseValueSubtags(buf, len)) {
1386                        if (strict) {
1387                            *status = U_ILLEGAL_ARGUMENT_ERROR;
1388                            break;
1389                        }
1390                        continue;
1391                    }
1392                } else {
1393                    if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1394                        if (strict) {
1395                            *status = U_ILLEGAL_ARGUMENT_ERROR;
1396                            break;
1397                        }
1398                        continue;
1399                    }
1400                }
1401                bcpKey = key;
1402                if ((len + 1) < extBufCapacity) {
1403                    uprv_memcpy(pExtBuf, buf, len);
1404                    bcpValue = pExtBuf;
1405
1406                    pExtBuf += len;
1407
1408                    *pExtBuf = 0;
1409                    pExtBuf++;
1410
1411                    extBufCapacity -= (len + 1);
1412                } else {
1413                    *status = U_ILLEGAL_ARGUMENT_ERROR;
1414                    break;
1415                }
1416            }
1417
1418            if (!isAttribute) {
1419                /* create ExtensionListEntry */
1420                ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1421                if (ext == NULL) {
1422                    *status = U_MEMORY_ALLOCATION_ERROR;
1423                    break;
1424                }
1425                ext->key = bcpKey;
1426                ext->value = bcpValue;
1427
1428                if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1429                    uprv_free(ext);
1430                    if (strict) {
1431                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1432                        break;
1433                    }
1434                }
1435            }
1436        }
1437
1438        /* Special handling for POSIX variant - add the keywords for POSIX */
1439        if (hadPosix) {
1440            /* create ExtensionListEntry for POSIX */
1441            ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1442            if (ext == NULL) {
1443                *status = U_MEMORY_ALLOCATION_ERROR;
1444                goto cleanup;
1445            }
1446            ext->key = POSIX_KEY;
1447            ext->value = POSIX_VALUE;
1448
1449            if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1450                uprv_free(ext);
1451            }
1452        }
1453
1454        if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1455            UBool startLDMLExtension = FALSE;
1456
1457            attr = firstAttr;
1458            ext = firstExt;
1459            do {
1460                if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1461                   /* write LDML singleton extension */
1462                   if (reslen < capacity) {
1463                       *(appendAt + reslen) = SEP;
1464                   }
1465                   reslen++;
1466                   if (reslen < capacity) {
1467                       *(appendAt + reslen) = LDMLEXT;
1468                   }
1469                   reslen++;
1470
1471                   startLDMLExtension = TRUE;
1472                }
1473
1474                /* write out the sorted BCP47 attributes, extensions and private use */
1475                if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1476                    if (reslen < capacity) {
1477                        *(appendAt + reslen) = SEP;
1478                    }
1479                    reslen++;
1480                    len = (int32_t)uprv_strlen(ext->key);
1481                    if (reslen < capacity) {
1482                        uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1483                    }
1484                    reslen += len;
1485                    if (reslen < capacity) {
1486                        *(appendAt + reslen) = SEP;
1487                    }
1488                    reslen++;
1489                    len = (int32_t)uprv_strlen(ext->value);
1490                    if (reslen < capacity) {
1491                        uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1492                    }
1493                    reslen += len;
1494
1495                    ext = ext->next;
1496                } else if (attr) {
1497                    /* write the value for the attributes */
1498                    if (reslen < capacity) {
1499                        *(appendAt + reslen) = SEP;
1500                    }
1501                    reslen++;
1502                    len = (int32_t)uprv_strlen(attr->attribute);
1503                    if (reslen < capacity) {
1504                        uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1505                    }
1506                    reslen += len;
1507
1508                    attr = attr->next;
1509                }
1510            } while (attr != NULL || ext != NULL);
1511        }
1512cleanup:
1513        /* clean up */
1514        ext = firstExt;
1515        while (ext != NULL) {
1516            ExtensionListEntry *tmpExt = ext->next;
1517            uprv_free(ext);
1518            ext = tmpExt;
1519        }
1520
1521        attr = firstAttr;
1522        while (attr != NULL) {
1523            AttributeListEntry *tmpAttr = attr->next;
1524            char *pValue = (char *)attr->attribute;
1525            uprv_free(pValue);
1526            uprv_free(attr);
1527            attr = tmpAttr;
1528        }
1529
1530        uenum_close(keywordEnum);
1531
1532        if (U_FAILURE(*status)) {
1533            return 0;
1534        }
1535    }
1536
1537    return u_terminateChars(appendAt, capacity, reslen, status);
1538}
1539
1540/**
1541 * Append keywords parsed from LDML extension value
1542 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1543 * Note: char* buf is used for storing keywords
1544 */
1545static void
1546_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1547    const char *pTag;   /* beginning of current subtag */
1548    const char *pKwds;  /* beginning of key-type pairs */
1549    UBool variantExists = *posixVariant;
1550
1551    ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
1552    ExtensionListEntry *kwd, *nextKwd;
1553
1554    AttributeListEntry *attrFirst = NULL;   /* first attribute */
1555    AttributeListEntry *attr, *nextAttr;
1556
1557    int32_t len;
1558    int32_t bufIdx = 0;
1559
1560    char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1561    int32_t attrBufIdx = 0;
1562
1563    /* Reset the posixVariant value */
1564    *posixVariant = FALSE;
1565
1566    pTag = ldmlext;
1567    pKwds = NULL;
1568
1569    /* Iterate through u extension attributes */
1570    while (*pTag) {
1571        /* locate next separator char */
1572        for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1573
1574        if (_isLDMLKey(pTag, len)) {
1575            pKwds = pTag;
1576            break;
1577        }
1578
1579        /* add this attribute to the list */
1580        attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1581        if (attr == NULL) {
1582            *status = U_MEMORY_ALLOCATION_ERROR;
1583            goto cleanup;
1584        }
1585
1586        if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1587            uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1588            attrBuf[attrBufIdx + len] = 0;
1589            attr->attribute = &attrBuf[attrBufIdx];
1590            attrBufIdx += (len + 1);
1591        } else {
1592            *status = U_ILLEGAL_ARGUMENT_ERROR;
1593            goto cleanup;
1594        }
1595
1596        if (!_addAttributeToList(&attrFirst, attr)) {
1597            *status = U_ILLEGAL_ARGUMENT_ERROR;
1598            uprv_free(attr);
1599            goto cleanup;
1600        }
1601
1602        /* next tag */
1603        pTag += len;
1604        if (*pTag) {
1605            /* next to the separator */
1606            pTag++;
1607        }
1608    }
1609
1610    if (attrFirst) {
1611        /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1612
1613        if (attrBufIdx > bufSize) {
1614            /* attrBufIdx == <total length of attribute subtag> + 1 */
1615            *status = U_ILLEGAL_ARGUMENT_ERROR;
1616            goto cleanup;
1617        }
1618
1619        kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1620        if (kwd == NULL) {
1621            *status = U_MEMORY_ALLOCATION_ERROR;
1622            goto cleanup;
1623        }
1624
1625        kwd->key = LOCALE_ATTRIBUTE_KEY;
1626        kwd->value = buf;
1627
1628        /* attribute subtags sorted in alphabetical order as type */
1629        attr = attrFirst;
1630        while (attr != NULL) {
1631            nextAttr = attr->next;
1632
1633            /* buffer size check is done above */
1634            if (attr != attrFirst) {
1635                *(buf + bufIdx) = SEP;
1636                bufIdx++;
1637            }
1638
1639            len = uprv_strlen(attr->attribute);
1640            uprv_memcpy(buf + bufIdx, attr->attribute, len);
1641            bufIdx += len;
1642
1643            attr = nextAttr;
1644        }
1645        *(buf + bufIdx) = 0;
1646        bufIdx++;
1647
1648        if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1649            *status = U_ILLEGAL_ARGUMENT_ERROR;
1650            uprv_free(kwd);
1651            goto cleanup;
1652        }
1653
1654        /* once keyword entry is created, delete the attribute list */
1655        attr = attrFirst;
1656        while (attr != NULL) {
1657            nextAttr = attr->next;
1658            uprv_free(attr);
1659            attr = nextAttr;
1660        }
1661        attrFirst = NULL;
1662    }
1663
1664    if (pKwds) {
1665        const char *pBcpKey = NULL;     /* u extenstion key subtag */
1666        const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
1667        int32_t bcpKeyLen = 0;
1668        int32_t bcpTypeLen = 0;
1669        UBool isDone = FALSE;
1670
1671        pTag = pKwds;
1672        /* BCP47 representation of LDML key/type pairs */
1673        while (!isDone) {
1674            const char *pNextBcpKey = NULL;
1675            int32_t nextBcpKeyLen;
1676            UBool emitKeyword = FALSE;
1677
1678            if (*pTag) {
1679                /* locate next separator char */
1680                for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1681
1682                if (_isLDMLKey(pTag, len)) {
1683                    if (pBcpKey) {
1684                        emitKeyword = TRUE;
1685                        pNextBcpKey = pTag;
1686                        nextBcpKeyLen = len;
1687                    } else {
1688                        pBcpKey = pTag;
1689                        bcpKeyLen = len;
1690                    }
1691                } else {
1692                    U_ASSERT(pBcpKey != NULL);
1693                    /* within LDML type subtags */
1694                    if (pBcpType) {
1695                        bcpTypeLen += (len + 1);
1696                    } else {
1697                        pBcpType = pTag;
1698                        bcpTypeLen = len;
1699                    }
1700                }
1701
1702                /* next tag */
1703                pTag += len;
1704                if (*pTag) {
1705                    /* next to the separator */
1706                    pTag++;
1707                }
1708            } else {
1709                /* processing last one */
1710                emitKeyword = TRUE;
1711                isDone = TRUE;
1712            }
1713
1714            if (emitKeyword) {
1715                const char *pKey = NULL;    /* LDML key */
1716                const char *pType = NULL;   /* LDML type */
1717
1718                U_ASSERT(pBcpKey != NULL);
1719
1720                /* u extension key to LDML key */
1721                len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1722                if (U_FAILURE(*status)) {
1723                    goto cleanup;
1724                }
1725                pKey = buf + bufIdx;
1726                bufIdx += len;
1727                *(buf + bufIdx) = 0;
1728                bufIdx++;
1729
1730                if (pBcpType) {
1731                    /* BCP type to locale type */
1732                    len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1733                    if (U_FAILURE(*status)) {
1734                        goto cleanup;
1735                    }
1736                    pType = buf + bufIdx;
1737                    bufIdx += len;
1738                    *(buf + bufIdx) = 0;
1739                    bufIdx++;
1740                } else {
1741                    /* typeless - default type value is "yes" */
1742                    pType = LOCALE_TYPE_YES;
1743                }
1744
1745                /* Special handling for u-va-posix, since we want to treat this as a variant,
1746                   not as a keyword */
1747                if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1748                    *posixVariant = TRUE;
1749                } else {
1750                    /* create an ExtensionListEntry for this keyword */
1751                    kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1752                    if (kwd == NULL) {
1753                        *status = U_MEMORY_ALLOCATION_ERROR;
1754                        goto cleanup;
1755                    }
1756
1757                    kwd->key = pKey;
1758                    kwd->value = pType;
1759
1760                    if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1761                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1762                        uprv_free(kwd);
1763                        goto cleanup;
1764                    }
1765                }
1766
1767                pBcpKey = pNextBcpKey;
1768                bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1769                pBcpType = NULL;
1770                bcpTypeLen = 0;
1771            }
1772        }
1773    }
1774
1775    kwd = kwdFirst;
1776    while (kwd != NULL) {
1777        nextKwd = kwd->next;
1778        _addExtensionToList(appendTo, kwd, FALSE);
1779        kwd = nextKwd;
1780    }
1781
1782    return;
1783
1784cleanup:
1785    attr = attrFirst;
1786    while (attr != NULL) {
1787        nextAttr = attr->next;
1788        uprv_free(attr);
1789        attr = nextAttr;
1790    }
1791
1792    kwd = kwdFirst;
1793    while (kwd != NULL) {
1794        nextKwd = kwd->next;
1795        uprv_free(kwd);
1796        kwd = nextKwd;
1797    }
1798}
1799
1800
1801static int32_t
1802_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1803    int32_t reslen = 0;
1804    int32_t i, n;
1805    int32_t len;
1806    ExtensionListEntry *kwdFirst = NULL;
1807    ExtensionListEntry *kwd;
1808    const char *key, *type;
1809    char *kwdBuf = NULL;
1810    int32_t kwdBufLength = capacity;
1811    UBool posixVariant = FALSE;
1812
1813    if (U_FAILURE(*status)) {
1814        return 0;
1815    }
1816
1817    kwdBuf = (char*)uprv_malloc(kwdBufLength);
1818    if (kwdBuf == NULL) {
1819        *status = U_MEMORY_ALLOCATION_ERROR;
1820        return 0;
1821    }
1822
1823    /* Determine if variants already exists */
1824    if (ultag_getVariantsSize(langtag)) {
1825        posixVariant = TRUE;
1826    }
1827
1828    n = ultag_getExtensionsSize(langtag);
1829
1830    /* resolve locale keywords and reordering keys */
1831    for (i = 0; i < n; i++) {
1832        key = ultag_getExtensionKey(langtag, i);
1833        type = ultag_getExtensionValue(langtag, i);
1834        if (*key == LDMLEXT) {
1835            _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1836            if (U_FAILURE(*status)) {
1837                break;
1838            }
1839        } else {
1840            kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1841            if (kwd == NULL) {
1842                *status = U_MEMORY_ALLOCATION_ERROR;
1843                break;
1844            }
1845            kwd->key = key;
1846            kwd->value = type;
1847            if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1848                uprv_free(kwd);
1849                *status = U_ILLEGAL_ARGUMENT_ERROR;
1850                break;
1851            }
1852        }
1853    }
1854
1855    if (U_SUCCESS(*status)) {
1856        type = ultag_getPrivateUse(langtag);
1857        if ((int32_t)uprv_strlen(type) > 0) {
1858            /* add private use as a keyword */
1859            kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1860            if (kwd == NULL) {
1861                *status = U_MEMORY_ALLOCATION_ERROR;
1862            } else {
1863                kwd->key = PRIVATEUSE_KEY;
1864                kwd->value = type;
1865                if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1866                    uprv_free(kwd);
1867                    *status = U_ILLEGAL_ARGUMENT_ERROR;
1868                }
1869            }
1870        }
1871    }
1872
1873    /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1874
1875    if (U_SUCCESS(*status) && posixVariant) {
1876        len = (int32_t) uprv_strlen(_POSIX);
1877        if (reslen < capacity) {
1878            uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1879        }
1880        reslen += len;
1881    }
1882
1883    if (U_SUCCESS(*status) && kwdFirst != NULL) {
1884        /* write out the sorted keywords */
1885        UBool firstValue = TRUE;
1886        kwd = kwdFirst;
1887        do {
1888            if (reslen < capacity) {
1889                if (firstValue) {
1890                    /* '@' */
1891                    *(appendAt + reslen) = LOCALE_EXT_SEP;
1892                    firstValue = FALSE;
1893                } else {
1894                    /* ';' */
1895                    *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1896                }
1897            }
1898            reslen++;
1899
1900            /* key */
1901            len = (int32_t)uprv_strlen(kwd->key);
1902            if (reslen < capacity) {
1903                uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1904            }
1905            reslen += len;
1906
1907            /* '=' */
1908            if (reslen < capacity) {
1909                *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1910            }
1911            reslen++;
1912
1913            /* type */
1914            len = (int32_t)uprv_strlen(kwd->value);
1915            if (reslen < capacity) {
1916                uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1917            }
1918            reslen += len;
1919
1920            kwd = kwd->next;
1921        } while (kwd);
1922    }
1923
1924    /* clean up */
1925    kwd = kwdFirst;
1926    while (kwd != NULL) {
1927        ExtensionListEntry *tmpKwd = kwd->next;
1928        uprv_free(kwd);
1929        kwd = tmpKwd;
1930    }
1931
1932    uprv_free(kwdBuf);
1933
1934    if (U_FAILURE(*status)) {
1935        return 0;
1936    }
1937
1938    return u_terminateChars(appendAt, capacity, reslen, status);
1939}
1940
1941static int32_t
1942_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1943    char buf[ULOC_FULLNAME_CAPACITY];
1944    char tmpAppend[ULOC_FULLNAME_CAPACITY];
1945    UErrorCode tmpStatus = U_ZERO_ERROR;
1946    int32_t len, i;
1947    int32_t reslen = 0;
1948
1949    if (U_FAILURE(*status)) {
1950        return 0;
1951    }
1952
1953    len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1954    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1955        if (strict) {
1956            *status = U_ILLEGAL_ARGUMENT_ERROR;
1957        }
1958        return 0;
1959    }
1960
1961    if (len > 0) {
1962        char *p, *pPriv;
1963        UBool bNext = TRUE;
1964        UBool firstValue = TRUE;
1965        UBool writeValue;
1966
1967        pPriv = NULL;
1968        p = buf;
1969        while (bNext) {
1970            writeValue = FALSE;
1971            if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1972                if (*p == 0) {
1973                    bNext = FALSE;
1974                } else {
1975                    *p = 0; /* terminate */
1976                }
1977                if (pPriv != NULL) {
1978                    /* Private use in the canonical format is lowercase in BCP47 */
1979                    for (i = 0; *(pPriv + i) != 0; i++) {
1980                        *(pPriv + i) = uprv_tolower(*(pPriv + i));
1981                    }
1982
1983                    /* validate */
1984                    if (_isPrivateuseValueSubtag(pPriv, -1)) {
1985                        if (firstValue) {
1986                            if (!_isVariantSubtag(pPriv, -1)) {
1987                                writeValue = TRUE;
1988                            }
1989                        } else {
1990                            writeValue = TRUE;
1991                        }
1992                    } else if (strict) {
1993                        *status = U_ILLEGAL_ARGUMENT_ERROR;
1994                        break;
1995                    } else {
1996                        break;
1997                    }
1998
1999                    if (writeValue) {
2000                        if (reslen < capacity) {
2001                            tmpAppend[reslen++] = SEP;
2002                        }
2003
2004                        if (firstValue) {
2005                            if (reslen < capacity) {
2006                                tmpAppend[reslen++] = *PRIVATEUSE_KEY;
2007                            }
2008
2009                            if (reslen < capacity) {
2010                                tmpAppend[reslen++] = SEP;
2011                            }
2012
2013                            len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
2014                            if (reslen < capacity) {
2015                                uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
2016                            }
2017                            reslen += len;
2018
2019                            if (reslen < capacity) {
2020                                tmpAppend[reslen++] = SEP;
2021                            }
2022
2023                            firstValue = FALSE;
2024                        }
2025
2026                        len = (int32_t)uprv_strlen(pPriv);
2027                        if (reslen < capacity) {
2028                            uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
2029                        }
2030                        reslen += len;
2031                    }
2032                }
2033                /* reset private use starting position */
2034                pPriv = NULL;
2035            } else if (pPriv == NULL) {
2036                pPriv = p;
2037            }
2038            p++;
2039        }
2040
2041        if (U_FAILURE(*status)) {
2042            return 0;
2043        }
2044    }
2045
2046    if (U_SUCCESS(*status)) {
2047        len = reslen;
2048        if (reslen < capacity) {
2049            uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
2050        }
2051    }
2052
2053    u_terminateChars(appendAt, capacity, reslen, status);
2054
2055    return reslen;
2056}
2057
2058/*
2059* -------------------------------------------------
2060*
2061* ultag_ functions
2062*
2063* -------------------------------------------------
2064*/
2065
2066/* Bit flags used by the parser */
2067#define LANG 0x0001
2068#define EXTL 0x0002
2069#define SCRT 0x0004
2070#define REGN 0x0008
2071#define VART 0x0010
2072#define EXTS 0x0020
2073#define EXTV 0x0040
2074#define PRIV 0x0080
2075
2076static ULanguageTag*
2077ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
2078    ULanguageTag *t;
2079    char *tagBuf;
2080    int16_t next;
2081    char *pSubtag, *pNext, *pLastGoodPosition;
2082    int32_t subtagLen;
2083    int32_t extlangIdx;
2084    ExtensionListEntry *pExtension;
2085    char *pExtValueSubtag, *pExtValueSubtagEnd;
2086    int32_t i;
2087    UBool privateuseVar = FALSE;
2088    int32_t grandfatheredLen = 0;
2089
2090    if (parsedLen != NULL) {
2091        *parsedLen = 0;
2092    }
2093
2094    if (U_FAILURE(*status)) {
2095        return NULL;
2096    }
2097
2098    if (tagLen < 0) {
2099        tagLen = (int32_t)uprv_strlen(tag);
2100    }
2101
2102    /* copy the entire string */
2103    tagBuf = (char*)uprv_malloc(tagLen + 1);
2104    if (tagBuf == NULL) {
2105        *status = U_MEMORY_ALLOCATION_ERROR;
2106        return NULL;
2107    }
2108    uprv_memcpy(tagBuf, tag, tagLen);
2109    *(tagBuf + tagLen) = 0;
2110
2111    /* create a ULanguageTag */
2112    t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
2113    if (t == NULL) {
2114        uprv_free(tagBuf);
2115        *status = U_MEMORY_ALLOCATION_ERROR;
2116        return NULL;
2117    }
2118    _initializeULanguageTag(t);
2119    t->buf = tagBuf;
2120
2121    if (tagLen < MINLEN) {
2122        /* the input tag is too short - return empty ULanguageTag */
2123        return t;
2124    }
2125
2126    /* check if the tag is grandfathered */
2127    for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
2128        if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
2129            int32_t newTagLength;
2130
2131            grandfatheredLen = tagLen;  /* back up for output parsedLen */
2132            newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
2133            if (tagLen < newTagLength) {
2134                uprv_free(tagBuf);
2135                tagBuf = (char*)uprv_malloc(newTagLength + 1);
2136                if (tagBuf == NULL) {
2137                    *status = U_MEMORY_ALLOCATION_ERROR;
2138                    return NULL;
2139                }
2140                t->buf = tagBuf;
2141                tagLen = newTagLength;
2142            }
2143            uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
2144            break;
2145        }
2146    }
2147
2148    /*
2149     * langtag      =   language
2150     *                  ["-" script]
2151     *                  ["-" region]
2152     *                  *("-" variant)
2153     *                  *("-" extension)
2154     *                  ["-" privateuse]
2155     */
2156
2157    next = LANG | PRIV;
2158    pNext = pLastGoodPosition = tagBuf;
2159    extlangIdx = 0;
2160    pExtension = NULL;
2161    pExtValueSubtag = NULL;
2162    pExtValueSubtagEnd = NULL;
2163
2164    while (pNext) {
2165        char *pSep;
2166
2167        pSubtag = pNext;
2168
2169        /* locate next separator char */
2170        pSep = pSubtag;
2171        while (*pSep) {
2172            if (*pSep == SEP) {
2173                break;
2174            }
2175            pSep++;
2176        }
2177        if (*pSep == 0) {
2178            /* last subtag */
2179            pNext = NULL;
2180        } else {
2181            pNext = pSep + 1;
2182        }
2183        subtagLen = (int32_t)(pSep - pSubtag);
2184
2185        if (next & LANG) {
2186            if (_isLanguageSubtag(pSubtag, subtagLen)) {
2187                *pSep = 0;  /* terminate */
2188                t->language = T_CString_toLowerCase(pSubtag);
2189
2190                pLastGoodPosition = pSep;
2191                next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2192                continue;
2193            }
2194        }
2195        if (next & EXTL) {
2196            if (_isExtlangSubtag(pSubtag, subtagLen)) {
2197                *pSep = 0;
2198                t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
2199
2200                pLastGoodPosition = pSep;
2201                if (extlangIdx < 3) {
2202                    next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2203                } else {
2204                    next = SCRT | REGN | VART | EXTS | PRIV;
2205                }
2206                continue;
2207            }
2208        }
2209        if (next & SCRT) {
2210            if (_isScriptSubtag(pSubtag, subtagLen)) {
2211                char *p = pSubtag;
2212
2213                *pSep = 0;
2214
2215                /* to title case */
2216                *p = uprv_toupper(*p);
2217                p++;
2218                for (; *p; p++) {
2219                    *p = uprv_tolower(*p);
2220                }
2221
2222                t->script = pSubtag;
2223
2224                pLastGoodPosition = pSep;
2225                next = REGN | VART | EXTS | PRIV;
2226                continue;
2227            }
2228        }
2229        if (next & REGN) {
2230            if (_isRegionSubtag(pSubtag, subtagLen)) {
2231                *pSep = 0;
2232                t->region = T_CString_toUpperCase(pSubtag);
2233
2234                pLastGoodPosition = pSep;
2235                next = VART | EXTS | PRIV;
2236                continue;
2237            }
2238        }
2239        if (next & VART) {
2240            if (_isVariantSubtag(pSubtag, subtagLen) ||
2241               (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
2242                VariantListEntry *var;
2243                UBool isAdded;
2244
2245                var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
2246                if (var == NULL) {
2247                    *status = U_MEMORY_ALLOCATION_ERROR;
2248                    goto error;
2249                }
2250                *pSep = 0;
2251                var->variant = T_CString_toUpperCase(pSubtag);
2252                isAdded = _addVariantToList(&(t->variants), var);
2253                if (!isAdded) {
2254                    /* duplicated variant entry */
2255                    uprv_free(var);
2256                    break;
2257                }
2258                pLastGoodPosition = pSep;
2259                next = VART | EXTS | PRIV;
2260                continue;
2261            }
2262        }
2263        if (next & EXTS) {
2264            if (_isExtensionSingleton(pSubtag, subtagLen)) {
2265                if (pExtension != NULL) {
2266                    if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2267                        /* the previous extension is incomplete */
2268                        uprv_free(pExtension);
2269                        pExtension = NULL;
2270                        break;
2271                    }
2272
2273                    /* terminate the previous extension value */
2274                    *pExtValueSubtagEnd = 0;
2275                    pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2276
2277                    /* insert the extension to the list */
2278                    if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2279                        pLastGoodPosition = pExtValueSubtagEnd;
2280                    } else {
2281                        /* stop parsing here */
2282                        uprv_free(pExtension);
2283                        pExtension = NULL;
2284                        break;
2285                    }
2286                }
2287
2288                /* create a new extension */
2289                pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2290                if (pExtension == NULL) {
2291                    *status = U_MEMORY_ALLOCATION_ERROR;
2292                    goto error;
2293                }
2294                *pSep = 0;
2295                pExtension->key = T_CString_toLowerCase(pSubtag);
2296                pExtension->value = NULL;   /* will be set later */
2297
2298                /*
2299                 * reset the start and the end location of extension value
2300                 * subtags for this extension
2301                 */
2302                pExtValueSubtag = NULL;
2303                pExtValueSubtagEnd = NULL;
2304
2305                next = EXTV;
2306                continue;
2307            }
2308        }
2309        if (next & EXTV) {
2310            if (_isExtensionSubtag(pSubtag, subtagLen)) {
2311                if (pExtValueSubtag == NULL) {
2312                    /* if the start postion of this extension's value is not yet,
2313                        this one is the first value subtag */
2314                    pExtValueSubtag = pSubtag;
2315                }
2316
2317                /* Mark the end of this subtag */
2318                pExtValueSubtagEnd = pSep;
2319                next = EXTS | EXTV | PRIV;
2320
2321                continue;
2322            }
2323        }
2324        if (next & PRIV) {
2325            if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2326                char *pPrivuseVal;
2327
2328                if (pExtension != NULL) {
2329                    /* Process the last extension */
2330                    if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2331                        /* the previous extension is incomplete */
2332                        uprv_free(pExtension);
2333                        pExtension = NULL;
2334                        break;
2335                    } else {
2336                        /* terminate the previous extension value */
2337                        *pExtValueSubtagEnd = 0;
2338                        pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2339
2340                        /* insert the extension to the list */
2341                        if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2342                            pLastGoodPosition = pExtValueSubtagEnd;
2343                            pExtension = NULL;
2344                        } else {
2345                        /* stop parsing here */
2346                            uprv_free(pExtension);
2347                            pExtension = NULL;
2348                            break;
2349                        }
2350                    }
2351                }
2352
2353                /* The rest of part will be private use value subtags */
2354                if (pNext == NULL) {
2355                    /* empty private use subtag */
2356                    break;
2357                }
2358                /* back up the private use value start position */
2359                pPrivuseVal = pNext;
2360
2361                /* validate private use value subtags */
2362                while (pNext) {
2363                    pSubtag = pNext;
2364                    pSep = pSubtag;
2365                    while (*pSep) {
2366                        if (*pSep == SEP) {
2367                            break;
2368                        }
2369                        pSep++;
2370                    }
2371                    if (*pSep == 0) {
2372                        /* last subtag */
2373                        pNext = NULL;
2374                    } else {
2375                        pNext = pSep + 1;
2376                    }
2377                    subtagLen = (int32_t)(pSep - pSubtag);
2378
2379                    if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2380                        *pSep = 0;
2381                        next = VART;
2382                        privateuseVar = TRUE;
2383                        break;
2384                    } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2385                        pLastGoodPosition = pSep;
2386                    } else {
2387                        break;
2388                    }
2389                }
2390
2391                if (next == VART) {
2392                    continue;
2393                }
2394
2395                if (pLastGoodPosition - pPrivuseVal > 0) {
2396                    *pLastGoodPosition = 0;
2397                    t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2398                }
2399                /* No more subtags, exiting the parse loop */
2400                break;
2401            }
2402            break;
2403        }
2404
2405        /* If we fell through here, it means this subtag is illegal - quit parsing */
2406        break;
2407    }
2408
2409    if (pExtension != NULL) {
2410        /* Process the last extension */
2411        if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2412            /* the previous extension is incomplete */
2413            uprv_free(pExtension);
2414        } else {
2415            /* terminate the previous extension value */
2416            *pExtValueSubtagEnd = 0;
2417            pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2418            /* insert the extension to the list */
2419            if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2420                pLastGoodPosition = pExtValueSubtagEnd;
2421            } else {
2422                uprv_free(pExtension);
2423            }
2424        }
2425    }
2426
2427    if (parsedLen != NULL) {
2428        *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2429    }
2430
2431    return t;
2432
2433error:
2434    uprv_free(t);
2435    return NULL;
2436}
2437
2438static void
2439ultag_close(ULanguageTag* langtag) {
2440
2441    if (langtag == NULL) {
2442        return;
2443    }
2444
2445    uprv_free(langtag->buf);
2446
2447    if (langtag->variants) {
2448        VariantListEntry *curVar = langtag->variants;
2449        while (curVar) {
2450            VariantListEntry *nextVar = curVar->next;
2451            uprv_free(curVar);
2452            curVar = nextVar;
2453        }
2454    }
2455
2456    if (langtag->extensions) {
2457        ExtensionListEntry *curExt = langtag->extensions;
2458        while (curExt) {
2459            ExtensionListEntry *nextExt = curExt->next;
2460            uprv_free(curExt);
2461            curExt = nextExt;
2462        }
2463    }
2464
2465    uprv_free(langtag);
2466}
2467
2468static const char*
2469ultag_getLanguage(const ULanguageTag* langtag) {
2470    return langtag->language;
2471}
2472
2473#if 0
2474static const char*
2475ultag_getJDKLanguage(const ULanguageTag* langtag) {
2476    int32_t i;
2477    for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2478        if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2479            return DEPRECATEDLANGS[i + 1];
2480        }
2481    }
2482    return langtag->language;
2483}
2484#endif
2485
2486static const char*
2487ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2488    if (idx >= 0 && idx < MAXEXTLANG) {
2489        return langtag->extlang[idx];
2490    }
2491    return NULL;
2492}
2493
2494static int32_t
2495ultag_getExtlangSize(const ULanguageTag* langtag) {
2496    int32_t size = 0;
2497    int32_t i;
2498    for (i = 0; i < MAXEXTLANG; i++) {
2499        if (langtag->extlang[i]) {
2500            size++;
2501        }
2502    }
2503    return size;
2504}
2505
2506static const char*
2507ultag_getScript(const ULanguageTag* langtag) {
2508    return langtag->script;
2509}
2510
2511static const char*
2512ultag_getRegion(const ULanguageTag* langtag) {
2513    return langtag->region;
2514}
2515
2516static const char*
2517ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2518    const char *var = NULL;
2519    VariantListEntry *cur = langtag->variants;
2520    int32_t i = 0;
2521    while (cur) {
2522        if (i == idx) {
2523            var = cur->variant;
2524            break;
2525        }
2526        cur = cur->next;
2527        i++;
2528    }
2529    return var;
2530}
2531
2532static int32_t
2533ultag_getVariantsSize(const ULanguageTag* langtag) {
2534    int32_t size = 0;
2535    VariantListEntry *cur = langtag->variants;
2536    while (TRUE) {
2537        if (cur == NULL) {
2538            break;
2539        }
2540        size++;
2541        cur = cur->next;
2542    }
2543    return size;
2544}
2545
2546static const char*
2547ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2548    const char *key = NULL;
2549    ExtensionListEntry *cur = langtag->extensions;
2550    int32_t i = 0;
2551    while (cur) {
2552        if (i == idx) {
2553            key = cur->key;
2554            break;
2555        }
2556        cur = cur->next;
2557        i++;
2558    }
2559    return key;
2560}
2561
2562static const char*
2563ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2564    const char *val = NULL;
2565    ExtensionListEntry *cur = langtag->extensions;
2566    int32_t i = 0;
2567    while (cur) {
2568        if (i == idx) {
2569            val = cur->value;
2570            break;
2571        }
2572        cur = cur->next;
2573        i++;
2574    }
2575    return val;
2576}
2577
2578static int32_t
2579ultag_getExtensionsSize(const ULanguageTag* langtag) {
2580    int32_t size = 0;
2581    ExtensionListEntry *cur = langtag->extensions;
2582    while (TRUE) {
2583        if (cur == NULL) {
2584            break;
2585        }
2586        size++;
2587        cur = cur->next;
2588    }
2589    return size;
2590}
2591
2592static const char*
2593ultag_getPrivateUse(const ULanguageTag* langtag) {
2594    return langtag->privateuse;
2595}
2596
2597#if 0
2598static const char*
2599ultag_getGrandfathered(const ULanguageTag* langtag) {
2600    return langtag->grandfathered;
2601}
2602#endif
2603
2604
2605/*
2606* -------------------------------------------------
2607*
2608* Locale/BCP47 conversion APIs, exposed as uloc_*
2609*
2610* -------------------------------------------------
2611*/
2612U_CAPI int32_t U_EXPORT2
2613uloc_toLanguageTag(const char* localeID,
2614                   char* langtag,
2615                   int32_t langtagCapacity,
2616                   UBool strict,
2617                   UErrorCode* status) {
2618    /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2619    char canonical[256];
2620    int32_t reslen = 0;
2621    UErrorCode tmpStatus = U_ZERO_ERROR;
2622    UBool hadPosix = FALSE;
2623    const char* pKeywordStart;
2624
2625    /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
2626    canonical[0] = 0;
2627    if (uprv_strlen(localeID) > 0) {
2628        uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2629        if (tmpStatus != U_ZERO_ERROR) {
2630            *status = U_ILLEGAL_ARGUMENT_ERROR;
2631            return 0;
2632        }
2633    }
2634
2635    /* For handling special case - private use only tag */
2636    pKeywordStart = locale_getKeywordsStart(canonical);
2637    if (pKeywordStart == canonical) {
2638        UEnumeration *kwdEnum;
2639        int kwdCnt = 0;
2640        UBool done = FALSE;
2641
2642        kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2643        if (kwdEnum != NULL) {
2644            kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2645            if (kwdCnt == 1) {
2646                const char *key;
2647                int32_t len = 0;
2648
2649                key = uenum_next(kwdEnum, &len, &tmpStatus);
2650                if (len == 1 && *key == PRIVATEUSE) {
2651                    char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2652                    buf[0] = PRIVATEUSE;
2653                    buf[1] = SEP;
2654                    len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2655                    if (U_SUCCESS(tmpStatus)) {
2656                        if (_isPrivateuseValueSubtags(&buf[2], len)) {
2657                            /* return private use only tag */
2658                            reslen = len + 2;
2659                            uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2660                            u_terminateChars(langtag, langtagCapacity, reslen, status);
2661                            done = TRUE;
2662                        } else if (strict) {
2663                            *status = U_ILLEGAL_ARGUMENT_ERROR;
2664                            done = TRUE;
2665                        }
2666                        /* if not strict mode, then "und" will be returned */
2667                    } else {
2668                        *status = U_ILLEGAL_ARGUMENT_ERROR;
2669                        done = TRUE;
2670                    }
2671                }
2672            }
2673            uenum_close(kwdEnum);
2674            if (done) {
2675                return reslen;
2676            }
2677        }
2678    }
2679
2680    reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2681    reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2682    reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2683    reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2684    reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2685    reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2686
2687    return reslen;
2688}
2689
2690
2691U_CAPI int32_t U_EXPORT2
2692uloc_forLanguageTag(const char* langtag,
2693                    char* localeID,
2694                    int32_t localeIDCapacity,
2695                    int32_t* parsedLength,
2696                    UErrorCode* status) {
2697    ULanguageTag *lt;
2698    int32_t reslen = 0;
2699    const char *subtag, *p;
2700    int32_t len;
2701    int32_t i, n;
2702    UBool noRegion = TRUE;
2703
2704    lt = ultag_parse(langtag, -1, parsedLength, status);
2705    if (U_FAILURE(*status)) {
2706        return 0;
2707    }
2708
2709    /* language */
2710    subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2711    if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2712        len = (int32_t)uprv_strlen(subtag);
2713        if (len > 0) {
2714            if (reslen < localeIDCapacity) {
2715                uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2716            }
2717            reslen += len;
2718        }
2719    }
2720
2721    /* script */
2722    subtag = ultag_getScript(lt);
2723    len = (int32_t)uprv_strlen(subtag);
2724    if (len > 0) {
2725        if (reslen < localeIDCapacity) {
2726            *(localeID + reslen) = LOCALE_SEP;
2727        }
2728        reslen++;
2729
2730        /* write out the script in title case */
2731        p = subtag;
2732        while (*p) {
2733            if (reslen < localeIDCapacity) {
2734                if (p == subtag) {
2735                    *(localeID + reslen) = uprv_toupper(*p);
2736                } else {
2737                    *(localeID + reslen) = *p;
2738                }
2739            }
2740            reslen++;
2741            p++;
2742        }
2743    }
2744
2745    /* region */
2746    subtag = ultag_getRegion(lt);
2747    len = (int32_t)uprv_strlen(subtag);
2748    if (len > 0) {
2749        if (reslen < localeIDCapacity) {
2750            *(localeID + reslen) = LOCALE_SEP;
2751        }
2752        reslen++;
2753        /* write out the retion in upper case */
2754        p = subtag;
2755        while (*p) {
2756            if (reslen < localeIDCapacity) {
2757                *(localeID + reslen) = uprv_toupper(*p);
2758            }
2759            reslen++;
2760            p++;
2761        }
2762        noRegion = FALSE;
2763    }
2764
2765    /* variants */
2766    n = ultag_getVariantsSize(lt);
2767    if (n > 0) {
2768        if (noRegion) {
2769            if (reslen < localeIDCapacity) {
2770                *(localeID + reslen) = LOCALE_SEP;
2771            }
2772            reslen++;
2773        }
2774
2775        for (i = 0; i < n; i++) {
2776            subtag = ultag_getVariant(lt, i);
2777            if (reslen < localeIDCapacity) {
2778                *(localeID + reslen) = LOCALE_SEP;
2779            }
2780            reslen++;
2781            /* write out the variant in upper case */
2782            p = subtag;
2783            while (*p) {
2784                if (reslen < localeIDCapacity) {
2785                    *(localeID + reslen) = uprv_toupper(*p);
2786                }
2787                reslen++;
2788                p++;
2789            }
2790        }
2791    }
2792
2793    /* keywords */
2794    n = ultag_getExtensionsSize(lt);
2795    subtag = ultag_getPrivateUse(lt);
2796    if (n > 0 || uprv_strlen(subtag) > 0) {
2797        if (reslen == 0 && n > 0) {
2798            /* need a language */
2799            if (reslen < localeIDCapacity) {
2800                uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2801            }
2802            reslen += LANG_UND_LEN;
2803        }
2804        len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2805        reslen += len;
2806    }
2807
2808    ultag_close(lt);
2809    return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2810}
2811
2812
2813