1/* 2********************************************************************** 3* Copyright (C) 2009-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6*/ 7 8#include "unicode/utypes.h" 9#include "unicode/ures.h" 10#include "unicode/putil.h" 11#include "unicode/uloc.h" 12#include "ustr_imp.h" 13#include "cmemory.h" 14#include "cstring.h" 15#include "putilimp.h" 16#include "uinvchar.h" 17#include "ulocimp.h" 18#include "uassert.h" 19 20/* struct holding a single variant */ 21typedef struct VariantListEntry { 22 const char *variant; 23 struct VariantListEntry *next; 24} VariantListEntry; 25 26/* struct holding a single attribute value */ 27typedef struct AttributeListEntry { 28 const char *attribute; 29 struct AttributeListEntry *next; 30} AttributeListEntry; 31 32/* struct holding a single extension */ 33typedef struct ExtensionListEntry { 34 const char *key; 35 const char *value; 36 struct ExtensionListEntry *next; 37} ExtensionListEntry; 38 39#define MAXEXTLANG 3 40typedef struct ULanguageTag { 41 char *buf; /* holding parsed subtags */ 42 const char *language; 43 const char *extlang[MAXEXTLANG]; 44 const char *script; 45 const char *region; 46 VariantListEntry *variants; 47 ExtensionListEntry *extensions; 48 const char *privateuse; 49 const char *grandfathered; 50} ULanguageTag; 51 52#define MINLEN 2 53#define SEP '-' 54#define PRIVATEUSE 'x' 55#define LDMLEXT 'u' 56 57#define LOCALE_SEP '_' 58#define LOCALE_EXT_SEP '@' 59#define LOCALE_KEYWORD_SEP ';' 60#define LOCALE_KEY_TYPE_SEP '=' 61 62#define ISALPHA(c) uprv_isASCIILetter(c) 63#define ISNUMERIC(c) ((c)>='0' && (c)<='9') 64 65static const char EMPTY[] = ""; 66static const char LANG_UND[] = "und"; 67static const char PRIVATEUSE_KEY[] = "x"; 68static const char _POSIX[] = "_POSIX"; 69static const char POSIX_KEY[] = "va"; 70static const char POSIX_VALUE[] = "posix"; 71static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; 72static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; 73static const char LOCALE_TYPE_YES[] = "yes"; 74 75#define LANG_UND_LEN 3 76 77static const char* const GRANDFATHERED[] = { 78/* grandfathered preferred */ 79 "art-lojban", "jbo", 80 "cel-gaulish", "xtg-x-cel-gaulish", 81 "en-GB-oed", "en-GB-x-oed", 82 "i-ami", "ami", 83 "i-bnn", "bnn", 84 "i-default", "en-x-i-default", 85 "i-enochian", "und-x-i-enochian", 86 "i-hak", "hak", 87 "i-klingon", "tlh", 88 "i-lux", "lb", 89 "i-mingo", "see-x-i-mingo", 90 "i-navajo", "nv", 91 "i-pwn", "pwn", 92 "i-tao", "tao", 93 "i-tay", "tay", 94 "i-tsu", "tsu", 95 "no-bok", "nb", 96 "no-nyn", "nn", 97 "sgn-be-fr", "sfb", 98 "sgn-be-nl", "vgt", 99 "sgn-ch-de", "sgg", 100 "zh-guoyu", "cmn", 101 "zh-hakka", "hak", 102 "zh-min", "nan-x-zh-min", 103 "zh-min-nan", "nan", 104 "zh-xiang", "hsn", 105 NULL, NULL 106}; 107 108static const char DEPRECATEDLANGS[][4] = { 109/* deprecated new */ 110 "iw", "he", 111 "ji", "yi", 112 "in", "id" 113}; 114 115/* 116* ------------------------------------------------- 117* 118* These ultag_ functions may be exposed as APIs later 119* 120* ------------------------------------------------- 121*/ 122 123static ULanguageTag* 124ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); 125 126static void 127ultag_close(ULanguageTag* langtag); 128 129static const char* 130ultag_getLanguage(const ULanguageTag* langtag); 131 132#if 0 133static const char* 134ultag_getJDKLanguage(const ULanguageTag* langtag); 135#endif 136 137static const char* 138ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); 139 140static int32_t 141ultag_getExtlangSize(const ULanguageTag* langtag); 142 143static const char* 144ultag_getScript(const ULanguageTag* langtag); 145 146static const char* 147ultag_getRegion(const ULanguageTag* langtag); 148 149static const char* 150ultag_getVariant(const ULanguageTag* langtag, int32_t idx); 151 152static int32_t 153ultag_getVariantsSize(const ULanguageTag* langtag); 154 155static const char* 156ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); 157 158static const char* 159ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); 160 161static int32_t 162ultag_getExtensionsSize(const ULanguageTag* langtag); 163 164static const char* 165ultag_getPrivateUse(const ULanguageTag* langtag); 166 167#if 0 168static const char* 169ultag_getGrandfathered(const ULanguageTag* langtag); 170#endif 171 172/* 173* ------------------------------------------------- 174* 175* Language subtag syntax validation functions 176* 177* ------------------------------------------------- 178*/ 179 180static UBool 181_isAlphaString(const char* s, int32_t len) { 182 int32_t i; 183 for (i = 0; i < len; i++) { 184 if (!ISALPHA(*(s + i))) { 185 return FALSE; 186 } 187 } 188 return TRUE; 189} 190 191static UBool 192_isNumericString(const char* s, int32_t len) { 193 int32_t i; 194 for (i = 0; i < len; i++) { 195 if (!ISNUMERIC(*(s + i))) { 196 return FALSE; 197 } 198 } 199 return TRUE; 200} 201 202static UBool 203_isAlphaNumericString(const char* s, int32_t len) { 204 int32_t i; 205 for (i = 0; i < len; i++) { 206 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { 207 return FALSE; 208 } 209 } 210 return TRUE; 211} 212 213static UBool 214_isLanguageSubtag(const char* s, int32_t len) { 215 /* 216 * language = 2*3ALPHA ; shortest ISO 639 code 217 * ["-" extlang] ; sometimes followed by 218 * ; extended language subtags 219 * / 4ALPHA ; or reserved for future use 220 * / 5*8ALPHA ; or registered language subtag 221 */ 222 if (len < 0) { 223 len = (int32_t)uprv_strlen(s); 224 } 225 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { 226 return TRUE; 227 } 228 return FALSE; 229} 230 231static UBool 232_isExtlangSubtag(const char* s, int32_t len) { 233 /* 234 * extlang = 3ALPHA ; selected ISO 639 codes 235 * *2("-" 3ALPHA) ; permanently reserved 236 */ 237 if (len < 0) { 238 len = (int32_t)uprv_strlen(s); 239 } 240 if (len == 3 && _isAlphaString(s, len)) { 241 return TRUE; 242 } 243 return FALSE; 244} 245 246static UBool 247_isScriptSubtag(const char* s, int32_t len) { 248 /* 249 * script = 4ALPHA ; ISO 15924 code 250 */ 251 if (len < 0) { 252 len = (int32_t)uprv_strlen(s); 253 } 254 if (len == 4 && _isAlphaString(s, len)) { 255 return TRUE; 256 } 257 return FALSE; 258} 259 260static UBool 261_isRegionSubtag(const char* s, int32_t len) { 262 /* 263 * region = 2ALPHA ; ISO 3166-1 code 264 * / 3DIGIT ; UN M.49 code 265 */ 266 if (len < 0) { 267 len = (int32_t)uprv_strlen(s); 268 } 269 if (len == 2 && _isAlphaString(s, len)) { 270 return TRUE; 271 } 272 if (len == 3 && _isNumericString(s, len)) { 273 return TRUE; 274 } 275 return FALSE; 276} 277 278static UBool 279_isVariantSubtag(const char* s, int32_t len) { 280 /* 281 * variant = 5*8alphanum ; registered variants 282 * / (DIGIT 3alphanum) 283 */ 284 if (len < 0) { 285 len = (int32_t)uprv_strlen(s); 286 } 287 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { 288 return TRUE; 289 } 290 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { 291 return TRUE; 292 } 293 return FALSE; 294} 295 296static UBool 297_isPrivateuseVariantSubtag(const char* s, int32_t len) { 298 /* 299 * variant = 1*8alphanum ; registered variants 300 * / (DIGIT 3alphanum) 301 */ 302 if (len < 0) { 303 len = (int32_t)uprv_strlen(s); 304 } 305 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 306 return TRUE; 307 } 308 return FALSE; 309} 310 311static UBool 312_isExtensionSingleton(const char* s, int32_t len) { 313 /* 314 * extension = singleton 1*("-" (2*8alphanum)) 315 */ 316 if (len < 0) { 317 len = (int32_t)uprv_strlen(s); 318 } 319 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { 320 return TRUE; 321 } 322 return FALSE; 323} 324 325static UBool 326_isExtensionSubtag(const char* s, int32_t len) { 327 /* 328 * extension = singleton 1*("-" (2*8alphanum)) 329 */ 330 if (len < 0) { 331 len = (int32_t)uprv_strlen(s); 332 } 333 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { 334 return TRUE; 335 } 336 return FALSE; 337} 338 339static UBool 340_isExtensionSubtags(const char* s, int32_t len) { 341 const char *p = s; 342 const char *pSubtag = NULL; 343 344 if (len < 0) { 345 len = (int32_t)uprv_strlen(s); 346 } 347 348 while ((p - s) < len) { 349 if (*p == SEP) { 350 if (pSubtag == NULL) { 351 return FALSE; 352 } 353 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { 354 return FALSE; 355 } 356 pSubtag = NULL; 357 } else if (pSubtag == NULL) { 358 pSubtag = p; 359 } 360 p++; 361 } 362 if (pSubtag == NULL) { 363 return FALSE; 364 } 365 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); 366} 367 368static UBool 369_isPrivateuseValueSubtag(const char* s, int32_t len) { 370 /* 371 * privateuse = "x" 1*("-" (1*8alphanum)) 372 */ 373 if (len < 0) { 374 len = (int32_t)uprv_strlen(s); 375 } 376 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 377 return TRUE; 378 } 379 return FALSE; 380} 381 382static UBool 383_isPrivateuseValueSubtags(const char* s, int32_t len) { 384 const char *p = s; 385 const char *pSubtag = NULL; 386 387 if (len < 0) { 388 len = (int32_t)uprv_strlen(s); 389 } 390 391 while ((p - s) < len) { 392 if (*p == SEP) { 393 if (pSubtag == NULL) { 394 return FALSE; 395 } 396 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { 397 return FALSE; 398 } 399 pSubtag = NULL; 400 } else if (pSubtag == NULL) { 401 pSubtag = p; 402 } 403 p++; 404 } 405 if (pSubtag == NULL) { 406 return FALSE; 407 } 408 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); 409} 410 411U_CFUNC UBool 412ultag_isUnicodeLocaleKey(const char* s, int32_t len) { 413 if (len < 0) { 414 len = (int32_t)uprv_strlen(s); 415 } 416 if (len == 2 && _isAlphaNumericString(s, len)) { 417 return TRUE; 418 } 419 return FALSE; 420} 421 422U_CFUNC UBool 423ultag_isUnicodeLocaleType(const char*s, int32_t len) { 424 const char* p; 425 int32_t subtagLen = 0; 426 427 if (len < 0) { 428 len = (int32_t)uprv_strlen(s); 429 } 430 431 for (p = s; len > 0; p++, len--) { 432 if (*p == SEP) { 433 if (subtagLen < 3) { 434 return FALSE; 435 } 436 subtagLen = 0; 437 } else if (ISALPHA(*p) || ISNUMERIC(*p)) { 438 subtagLen++; 439 if (subtagLen > 8) { 440 return FALSE; 441 } 442 } else { 443 return FALSE; 444 } 445 } 446 447 return (subtagLen >= 3); 448} 449/* 450* ------------------------------------------------- 451* 452* Helper functions 453* 454* ------------------------------------------------- 455*/ 456 457static UBool 458_addVariantToList(VariantListEntry **first, VariantListEntry *var) { 459 UBool bAdded = TRUE; 460 461 if (*first == NULL) { 462 var->next = NULL; 463 *first = var; 464 } else { 465 VariantListEntry *prev, *cur; 466 int32_t cmp; 467 468 /* variants order should be preserved */ 469 prev = NULL; 470 cur = *first; 471 while (TRUE) { 472 if (cur == NULL) { 473 prev->next = var; 474 var->next = NULL; 475 break; 476 } 477 478 /* Checking for duplicate variant */ 479 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); 480 if (cmp == 0) { 481 /* duplicated variant */ 482 bAdded = FALSE; 483 break; 484 } 485 prev = cur; 486 cur = cur->next; 487 } 488 } 489 490 return bAdded; 491} 492 493static UBool 494_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { 495 UBool bAdded = TRUE; 496 497 if (*first == NULL) { 498 attr->next = NULL; 499 *first = attr; 500 } else { 501 AttributeListEntry *prev, *cur; 502 int32_t cmp; 503 504 /* reorder variants in alphabetical order */ 505 prev = NULL; 506 cur = *first; 507 while (TRUE) { 508 if (cur == NULL) { 509 prev->next = attr; 510 attr->next = NULL; 511 break; 512 } 513 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); 514 if (cmp < 0) { 515 if (prev == NULL) { 516 *first = attr; 517 } else { 518 prev->next = attr; 519 } 520 attr->next = cur; 521 break; 522 } 523 if (cmp == 0) { 524 /* duplicated variant */ 525 bAdded = FALSE; 526 break; 527 } 528 prev = cur; 529 cur = cur->next; 530 } 531 } 532 533 return bAdded; 534} 535 536 537static UBool 538_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { 539 UBool bAdded = TRUE; 540 541 if (*first == NULL) { 542 ext->next = NULL; 543 *first = ext; 544 } else { 545 ExtensionListEntry *prev, *cur; 546 int32_t cmp; 547 548 /* reorder variants in alphabetical order */ 549 prev = NULL; 550 cur = *first; 551 while (TRUE) { 552 if (cur == NULL) { 553 prev->next = ext; 554 ext->next = NULL; 555 break; 556 } 557 if (localeToBCP) { 558 /* special handling for locale to bcp conversion */ 559 int32_t len, curlen; 560 561 len = (int32_t)uprv_strlen(ext->key); 562 curlen = (int32_t)uprv_strlen(cur->key); 563 564 if (len == 1 && curlen == 1) { 565 if (*(ext->key) == *(cur->key)) { 566 cmp = 0; 567 } else if (*(ext->key) == PRIVATEUSE) { 568 cmp = 1; 569 } else if (*(cur->key) == PRIVATEUSE) { 570 cmp = -1; 571 } else { 572 cmp = *(ext->key) - *(cur->key); 573 } 574 } else if (len == 1) { 575 cmp = *(ext->key) - LDMLEXT; 576 } else if (curlen == 1) { 577 cmp = LDMLEXT - *(cur->key); 578 } else { 579 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 580 } 581 } else { 582 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 583 } 584 if (cmp < 0) { 585 if (prev == NULL) { 586 *first = ext; 587 } else { 588 prev->next = ext; 589 } 590 ext->next = cur; 591 break; 592 } 593 if (cmp == 0) { 594 /* duplicated extension key */ 595 bAdded = FALSE; 596 break; 597 } 598 prev = cur; 599 cur = cur->next; 600 } 601 } 602 603 return bAdded; 604} 605 606static void 607_initializeULanguageTag(ULanguageTag* langtag) { 608 int32_t i; 609 610 langtag->buf = NULL; 611 612 langtag->language = EMPTY; 613 for (i = 0; i < MAXEXTLANG; i++) { 614 langtag->extlang[i] = NULL; 615 } 616 617 langtag->script = EMPTY; 618 langtag->region = EMPTY; 619 620 langtag->variants = NULL; 621 langtag->extensions = NULL; 622 623 langtag->grandfathered = EMPTY; 624 langtag->privateuse = EMPTY; 625} 626 627static int32_t 628_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 629 char buf[ULOC_LANG_CAPACITY]; 630 UErrorCode tmpStatus = U_ZERO_ERROR; 631 int32_t len, i; 632 int32_t reslen = 0; 633 634 if (U_FAILURE(*status)) { 635 return 0; 636 } 637 638 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); 639 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 640 if (strict) { 641 *status = U_ILLEGAL_ARGUMENT_ERROR; 642 return 0; 643 } 644 len = 0; 645 } 646 647 /* Note: returned language code is in lower case letters */ 648 649 if (len == 0) { 650 if (reslen < capacity) { 651 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 652 } 653 reslen += LANG_UND_LEN; 654 } else if (!_isLanguageSubtag(buf, len)) { 655 /* invalid language code */ 656 if (strict) { 657 *status = U_ILLEGAL_ARGUMENT_ERROR; 658 return 0; 659 } 660 if (reslen < capacity) { 661 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 662 } 663 reslen += LANG_UND_LEN; 664 } else { 665 /* resolve deprecated */ 666 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { 667 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { 668 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); 669 len = (int32_t)uprv_strlen(buf); 670 break; 671 } 672 } 673 if (reslen < capacity) { 674 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 675 } 676 reslen += len; 677 } 678 u_terminateChars(appendAt, capacity, reslen, status); 679 return reslen; 680} 681 682static int32_t 683_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 684 char buf[ULOC_SCRIPT_CAPACITY]; 685 UErrorCode tmpStatus = U_ZERO_ERROR; 686 int32_t len; 687 int32_t reslen = 0; 688 689 if (U_FAILURE(*status)) { 690 return 0; 691 } 692 693 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); 694 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 695 if (strict) { 696 *status = U_ILLEGAL_ARGUMENT_ERROR; 697 } 698 return 0; 699 } 700 701 if (len > 0) { 702 if (!_isScriptSubtag(buf, len)) { 703 /* invalid script code */ 704 if (strict) { 705 *status = U_ILLEGAL_ARGUMENT_ERROR; 706 } 707 return 0; 708 } else { 709 if (reslen < capacity) { 710 *(appendAt + reslen) = SEP; 711 } 712 reslen++; 713 714 if (reslen < capacity) { 715 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 716 } 717 reslen += len; 718 } 719 } 720 u_terminateChars(appendAt, capacity, reslen, status); 721 return reslen; 722} 723 724static int32_t 725_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 726 char buf[ULOC_COUNTRY_CAPACITY]; 727 UErrorCode tmpStatus = U_ZERO_ERROR; 728 int32_t len; 729 int32_t reslen = 0; 730 731 if (U_FAILURE(*status)) { 732 return 0; 733 } 734 735 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); 736 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 737 if (strict) { 738 *status = U_ILLEGAL_ARGUMENT_ERROR; 739 } 740 return 0; 741 } 742 743 if (len > 0) { 744 if (!_isRegionSubtag(buf, len)) { 745 /* invalid region code */ 746 if (strict) { 747 *status = U_ILLEGAL_ARGUMENT_ERROR; 748 } 749 return 0; 750 } else { 751 if (reslen < capacity) { 752 *(appendAt + reslen) = SEP; 753 } 754 reslen++; 755 756 if (reslen < capacity) { 757 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 758 } 759 reslen += len; 760 } 761 } 762 u_terminateChars(appendAt, capacity, reslen, status); 763 return reslen; 764} 765 766static int32_t 767_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { 768 char buf[ULOC_FULLNAME_CAPACITY]; 769 UErrorCode tmpStatus = U_ZERO_ERROR; 770 int32_t len, i; 771 int32_t reslen = 0; 772 773 if (U_FAILURE(*status)) { 774 return 0; 775 } 776 777 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 778 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 779 if (strict) { 780 *status = U_ILLEGAL_ARGUMENT_ERROR; 781 } 782 return 0; 783 } 784 785 if (len > 0) { 786 char *p, *pVar; 787 UBool bNext = TRUE; 788 VariantListEntry *var; 789 VariantListEntry *varFirst = NULL; 790 791 pVar = NULL; 792 p = buf; 793 while (bNext) { 794 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 795 if (*p == 0) { 796 bNext = FALSE; 797 } else { 798 *p = 0; /* terminate */ 799 } 800 if (pVar == NULL) { 801 if (strict) { 802 *status = U_ILLEGAL_ARGUMENT_ERROR; 803 break; 804 } 805 /* ignore empty variant */ 806 } else { 807 /* ICU uses upper case letters for variants, but 808 the canonical format is lowercase in BCP47 */ 809 for (i = 0; *(pVar + i) != 0; i++) { 810 *(pVar + i) = uprv_tolower(*(pVar + i)); 811 } 812 813 /* validate */ 814 if (_isVariantSubtag(pVar, -1)) { 815 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) { 816 /* emit the variant to the list */ 817 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 818 if (var == NULL) { 819 *status = U_MEMORY_ALLOCATION_ERROR; 820 break; 821 } 822 var->variant = pVar; 823 if (!_addVariantToList(&varFirst, var)) { 824 /* duplicated variant */ 825 uprv_free(var); 826 if (strict) { 827 *status = U_ILLEGAL_ARGUMENT_ERROR; 828 break; 829 } 830 } 831 } else { 832 /* Special handling for POSIX variant, need to remember that we had it and then */ 833 /* treat it like an extension later. */ 834 *hadPosix = TRUE; 835 } 836 } else if (strict) { 837 *status = U_ILLEGAL_ARGUMENT_ERROR; 838 break; 839 } else if (_isPrivateuseValueSubtag(pVar, -1)) { 840 /* Handle private use subtags separately */ 841 break; 842 } 843 } 844 /* reset variant starting position */ 845 pVar = NULL; 846 } else if (pVar == NULL) { 847 pVar = p; 848 } 849 p++; 850 } 851 852 if (U_SUCCESS(*status)) { 853 if (varFirst != NULL) { 854 int32_t varLen; 855 856 /* write out validated/normalized variants to the target */ 857 var = varFirst; 858 while (var != NULL) { 859 if (reslen < capacity) { 860 *(appendAt + reslen) = SEP; 861 } 862 reslen++; 863 varLen = (int32_t)uprv_strlen(var->variant); 864 if (reslen < capacity) { 865 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); 866 } 867 reslen += varLen; 868 var = var->next; 869 } 870 } 871 } 872 873 /* clean up */ 874 var = varFirst; 875 while (var != NULL) { 876 VariantListEntry *tmpVar = var->next; 877 uprv_free(var); 878 var = tmpVar; 879 } 880 881 if (U_FAILURE(*status)) { 882 return 0; 883 } 884 } 885 886 u_terminateChars(appendAt, capacity, reslen, status); 887 return reslen; 888} 889 890static int32_t 891_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 892 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 893 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; 894 int32_t attrBufLength = 0; 895 UBool isAttribute = FALSE; 896 UEnumeration *keywordEnum = NULL; 897 int32_t reslen = 0; 898 899 keywordEnum = uloc_openKeywords(localeID, status); 900 if (U_FAILURE(*status) && !hadPosix) { 901 uenum_close(keywordEnum); 902 return 0; 903 } 904 if (keywordEnum != NULL || hadPosix) { 905 /* reorder extensions */ 906 int32_t len; 907 const char *key; 908 ExtensionListEntry *firstExt = NULL; 909 ExtensionListEntry *ext; 910 AttributeListEntry *firstAttr = NULL; 911 AttributeListEntry *attr; 912 char *attrValue; 913 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 914 char *pExtBuf = extBuf; 915 int32_t extBufCapacity = sizeof(extBuf); 916 const char *bcpKey, *bcpValue; 917 UErrorCode tmpStatus = U_ZERO_ERROR; 918 int32_t keylen; 919 UBool isBcpUExt; 920 921 while (TRUE) { 922 isAttribute = FALSE; 923 key = uenum_next(keywordEnum, NULL, status); 924 if (key == NULL) { 925 break; 926 } 927 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); 928 /* buf must be null-terminated */ 929 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 930 if (strict) { 931 *status = U_ILLEGAL_ARGUMENT_ERROR; 932 break; 933 } 934 /* ignore this keyword */ 935 tmpStatus = U_ZERO_ERROR; 936 continue; 937 } 938 939 keylen = (int32_t)uprv_strlen(key); 940 isBcpUExt = (keylen > 1); 941 942 /* special keyword used for representing Unicode locale attributes */ 943 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { 944 isAttribute = TRUE; 945 if (len > 0) { 946 int32_t i = 0; 947 while (TRUE) { 948 attrBufLength = 0; 949 for (; i < len; i++) { 950 if (buf[i] != '-') { 951 attrBuf[attrBufLength++] = buf[i]; 952 } else { 953 i++; 954 break; 955 } 956 } 957 if (attrBufLength > 0) { 958 attrBuf[attrBufLength] = 0; 959 960 } else if (i >= len){ 961 break; 962 } 963 964 /* create AttributeListEntry */ 965 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); 966 if (attr == NULL) { 967 *status = U_MEMORY_ALLOCATION_ERROR; 968 break; 969 } 970 attrValue = (char*)uprv_malloc(attrBufLength + 1); 971 if (attrValue == NULL) { 972 *status = U_MEMORY_ALLOCATION_ERROR; 973 break; 974 } 975 uprv_strcpy(attrValue, attrBuf); 976 attr->attribute = attrValue; 977 978 if (!_addAttributeToList(&firstAttr, attr)) { 979 uprv_free(attr); 980 uprv_free(attrValue); 981 if (strict) { 982 *status = U_ILLEGAL_ARGUMENT_ERROR; 983 break; 984 } 985 } 986 } 987 } 988 } else if (isBcpUExt) { 989 bcpKey = uloc_toUnicodeLocaleKey(key); 990 if (bcpKey == NULL) { 991 if (strict) { 992 *status = U_ILLEGAL_ARGUMENT_ERROR; 993 break; 994 } 995 continue; 996 } 997 998 /* we've checked buf is null-terminated above */ 999 bcpValue = uloc_toUnicodeLocaleType(key, buf); 1000 if (bcpValue == NULL) { 1001 if (strict) { 1002 *status = U_ILLEGAL_ARGUMENT_ERROR; 1003 break; 1004 } 1005 continue; 1006 } 1007 if (bcpValue == buf) { 1008 /* 1009 When uloc_toUnicodeLocaleType(key, buf) returns the 1010 input value as is, the value is well-formed, but has 1011 no known mapping. This implementation normalizes the 1012 the value to lower case 1013 */ 1014 int32_t bcpValueLen = uprv_strlen(bcpValue); 1015 if (bcpValueLen < extBufCapacity) { 1016 uprv_strcpy(pExtBuf, bcpValue); 1017 T_CString_toLowerCase(pExtBuf); 1018 1019 bcpValue = pExtBuf; 1020 1021 pExtBuf += (bcpValueLen + 1); 1022 extBufCapacity -= (bcpValueLen + 1); 1023 } else { 1024 if (strict) { 1025 *status = U_ILLEGAL_ARGUMENT_ERROR; 1026 break; 1027 } 1028 continue; 1029 } 1030 } 1031 } else { 1032 if (*key == PRIVATEUSE) { 1033 if (!_isPrivateuseValueSubtags(buf, len)) { 1034 if (strict) { 1035 *status = U_ILLEGAL_ARGUMENT_ERROR; 1036 break; 1037 } 1038 continue; 1039 } 1040 } else { 1041 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { 1042 if (strict) { 1043 *status = U_ILLEGAL_ARGUMENT_ERROR; 1044 break; 1045 } 1046 continue; 1047 } 1048 } 1049 bcpKey = key; 1050 if ((len + 1) < extBufCapacity) { 1051 uprv_memcpy(pExtBuf, buf, len); 1052 bcpValue = pExtBuf; 1053 1054 pExtBuf += len; 1055 1056 *pExtBuf = 0; 1057 pExtBuf++; 1058 1059 extBufCapacity -= (len + 1); 1060 } else { 1061 *status = U_ILLEGAL_ARGUMENT_ERROR; 1062 break; 1063 } 1064 } 1065 1066 if (!isAttribute) { 1067 /* create ExtensionListEntry */ 1068 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1069 if (ext == NULL) { 1070 *status = U_MEMORY_ALLOCATION_ERROR; 1071 break; 1072 } 1073 ext->key = bcpKey; 1074 ext->value = bcpValue; 1075 1076 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1077 uprv_free(ext); 1078 if (strict) { 1079 *status = U_ILLEGAL_ARGUMENT_ERROR; 1080 break; 1081 } 1082 } 1083 } 1084 } 1085 1086 /* Special handling for POSIX variant - add the keywords for POSIX */ 1087 if (hadPosix) { 1088 /* create ExtensionListEntry for POSIX */ 1089 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1090 if (ext == NULL) { 1091 *status = U_MEMORY_ALLOCATION_ERROR; 1092 goto cleanup; 1093 } 1094 ext->key = POSIX_KEY; 1095 ext->value = POSIX_VALUE; 1096 1097 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1098 uprv_free(ext); 1099 } 1100 } 1101 1102 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { 1103 UBool startLDMLExtension = FALSE; 1104 1105 attr = firstAttr; 1106 ext = firstExt; 1107 do { 1108 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) { 1109 /* write LDML singleton extension */ 1110 if (reslen < capacity) { 1111 *(appendAt + reslen) = SEP; 1112 } 1113 reslen++; 1114 if (reslen < capacity) { 1115 *(appendAt + reslen) = LDMLEXT; 1116 } 1117 reslen++; 1118 1119 startLDMLExtension = TRUE; 1120 } 1121 1122 /* write out the sorted BCP47 attributes, extensions and private use */ 1123 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) { 1124 if (reslen < capacity) { 1125 *(appendAt + reslen) = SEP; 1126 } 1127 reslen++; 1128 len = (int32_t)uprv_strlen(ext->key); 1129 if (reslen < capacity) { 1130 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); 1131 } 1132 reslen += len; 1133 if (reslen < capacity) { 1134 *(appendAt + reslen) = SEP; 1135 } 1136 reslen++; 1137 len = (int32_t)uprv_strlen(ext->value); 1138 if (reslen < capacity) { 1139 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); 1140 } 1141 reslen += len; 1142 1143 ext = ext->next; 1144 } else if (attr) { 1145 /* write the value for the attributes */ 1146 if (reslen < capacity) { 1147 *(appendAt + reslen) = SEP; 1148 } 1149 reslen++; 1150 len = (int32_t)uprv_strlen(attr->attribute); 1151 if (reslen < capacity) { 1152 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); 1153 } 1154 reslen += len; 1155 1156 attr = attr->next; 1157 } 1158 } while (attr != NULL || ext != NULL); 1159 } 1160cleanup: 1161 /* clean up */ 1162 ext = firstExt; 1163 while (ext != NULL) { 1164 ExtensionListEntry *tmpExt = ext->next; 1165 uprv_free(ext); 1166 ext = tmpExt; 1167 } 1168 1169 attr = firstAttr; 1170 while (attr != NULL) { 1171 AttributeListEntry *tmpAttr = attr->next; 1172 char *pValue = (char *)attr->attribute; 1173 uprv_free(pValue); 1174 uprv_free(attr); 1175 attr = tmpAttr; 1176 } 1177 1178 uenum_close(keywordEnum); 1179 1180 if (U_FAILURE(*status)) { 1181 return 0; 1182 } 1183 } 1184 1185 return u_terminateChars(appendAt, capacity, reslen, status); 1186} 1187 1188/** 1189 * Append keywords parsed from LDML extension value 1190 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} 1191 * Note: char* buf is used for storing keywords 1192 */ 1193static void 1194_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { 1195 const char *pTag; /* beginning of current subtag */ 1196 const char *pKwds; /* beginning of key-type pairs */ 1197 UBool variantExists = *posixVariant; 1198 1199 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ 1200 ExtensionListEntry *kwd, *nextKwd; 1201 1202 AttributeListEntry *attrFirst = NULL; /* first attribute */ 1203 AttributeListEntry *attr, *nextAttr; 1204 1205 int32_t len; 1206 int32_t bufIdx = 0; 1207 1208 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1209 int32_t attrBufIdx = 0; 1210 1211 /* Reset the posixVariant value */ 1212 *posixVariant = FALSE; 1213 1214 pTag = ldmlext; 1215 pKwds = NULL; 1216 1217 /* Iterate through u extension attributes */ 1218 while (*pTag) { 1219 /* locate next separator char */ 1220 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); 1221 1222 if (ultag_isUnicodeLocaleKey(pTag, len)) { 1223 pKwds = pTag; 1224 break; 1225 } 1226 1227 /* add this attribute to the list */ 1228 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); 1229 if (attr == NULL) { 1230 *status = U_MEMORY_ALLOCATION_ERROR; 1231 goto cleanup; 1232 } 1233 1234 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { 1235 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); 1236 attrBuf[attrBufIdx + len] = 0; 1237 attr->attribute = &attrBuf[attrBufIdx]; 1238 attrBufIdx += (len + 1); 1239 } else { 1240 *status = U_ILLEGAL_ARGUMENT_ERROR; 1241 goto cleanup; 1242 } 1243 1244 if (!_addAttributeToList(&attrFirst, attr)) { 1245 *status = U_ILLEGAL_ARGUMENT_ERROR; 1246 uprv_free(attr); 1247 goto cleanup; 1248 } 1249 1250 /* next tag */ 1251 pTag += len; 1252 if (*pTag) { 1253 /* next to the separator */ 1254 pTag++; 1255 } 1256 } 1257 1258 if (attrFirst) { 1259 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ 1260 1261 if (attrBufIdx > bufSize) { 1262 /* attrBufIdx == <total length of attribute subtag> + 1 */ 1263 *status = U_ILLEGAL_ARGUMENT_ERROR; 1264 goto cleanup; 1265 } 1266 1267 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1268 if (kwd == NULL) { 1269 *status = U_MEMORY_ALLOCATION_ERROR; 1270 goto cleanup; 1271 } 1272 1273 kwd->key = LOCALE_ATTRIBUTE_KEY; 1274 kwd->value = buf; 1275 1276 /* attribute subtags sorted in alphabetical order as type */ 1277 attr = attrFirst; 1278 while (attr != NULL) { 1279 nextAttr = attr->next; 1280 1281 /* buffer size check is done above */ 1282 if (attr != attrFirst) { 1283 *(buf + bufIdx) = SEP; 1284 bufIdx++; 1285 } 1286 1287 len = uprv_strlen(attr->attribute); 1288 uprv_memcpy(buf + bufIdx, attr->attribute, len); 1289 bufIdx += len; 1290 1291 attr = nextAttr; 1292 } 1293 *(buf + bufIdx) = 0; 1294 bufIdx++; 1295 1296 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1297 *status = U_ILLEGAL_ARGUMENT_ERROR; 1298 uprv_free(kwd); 1299 goto cleanup; 1300 } 1301 1302 /* once keyword entry is created, delete the attribute list */ 1303 attr = attrFirst; 1304 while (attr != NULL) { 1305 nextAttr = attr->next; 1306 uprv_free(attr); 1307 attr = nextAttr; 1308 } 1309 attrFirst = NULL; 1310 } 1311 1312 if (pKwds) { 1313 const char *pBcpKey = NULL; /* u extenstion key subtag */ 1314 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ 1315 int32_t bcpKeyLen = 0; 1316 int32_t bcpTypeLen = 0; 1317 UBool isDone = FALSE; 1318 1319 pTag = pKwds; 1320 /* BCP47 representation of LDML key/type pairs */ 1321 while (!isDone) { 1322 const char *pNextBcpKey = NULL; 1323 int32_t nextBcpKeyLen = 0; 1324 UBool emitKeyword = FALSE; 1325 1326 if (*pTag) { 1327 /* locate next separator char */ 1328 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); 1329 1330 if (ultag_isUnicodeLocaleKey(pTag, len)) { 1331 if (pBcpKey) { 1332 emitKeyword = TRUE; 1333 pNextBcpKey = pTag; 1334 nextBcpKeyLen = len; 1335 } else { 1336 pBcpKey = pTag; 1337 bcpKeyLen = len; 1338 } 1339 } else { 1340 U_ASSERT(pBcpKey != NULL); 1341 /* within LDML type subtags */ 1342 if (pBcpType) { 1343 bcpTypeLen += (len + 1); 1344 } else { 1345 pBcpType = pTag; 1346 bcpTypeLen = len; 1347 } 1348 } 1349 1350 /* next tag */ 1351 pTag += len; 1352 if (*pTag) { 1353 /* next to the separator */ 1354 pTag++; 1355 } 1356 } else { 1357 /* processing last one */ 1358 emitKeyword = TRUE; 1359 isDone = TRUE; 1360 } 1361 1362 if (emitKeyword) { 1363 const char *pKey = NULL; /* LDML key */ 1364 const char *pType = NULL; /* LDML type */ 1365 1366 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ 1367 1368 U_ASSERT(pBcpKey != NULL); 1369 1370 if (bcpKeyLen >= sizeof(bcpKeyBuf)) { 1371 /* the BCP key is invalid */ 1372 *status = U_ILLEGAL_ARGUMENT_ERROR; 1373 goto cleanup; 1374 } 1375 1376 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); 1377 bcpKeyBuf[bcpKeyLen] = 0; 1378 1379 /* u extension key to LDML key */ 1380 pKey = uloc_toLegacyKey(bcpKeyBuf); 1381 if (pKey == NULL) { 1382 *status = U_ILLEGAL_ARGUMENT_ERROR; 1383 goto cleanup; 1384 } 1385 if (pKey == bcpKeyBuf) { 1386 /* 1387 The key returned by toLegacyKey points to the input buffer. 1388 We normalize the result key to lower case. 1389 */ 1390 T_CString_toLowerCase(bcpKeyBuf); 1391 if (bufSize - bufIdx - 1 >= bcpKeyLen) { 1392 uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); 1393 pKey = buf + bufIdx; 1394 bufIdx += bcpKeyLen; 1395 *(buf + bufIdx) = 0; 1396 bufIdx++; 1397 } else { 1398 *status = U_BUFFER_OVERFLOW_ERROR; 1399 goto cleanup; 1400 } 1401 } 1402 1403 if (pBcpType) { 1404 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ 1405 if (bcpTypeLen >= sizeof(bcpTypeBuf)) { 1406 /* the BCP type is too long */ 1407 *status = U_ILLEGAL_ARGUMENT_ERROR; 1408 goto cleanup; 1409 } 1410 1411 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); 1412 bcpTypeBuf[bcpTypeLen] = 0; 1413 1414 /* BCP type to locale type */ 1415 pType = uloc_toLegacyType(pKey, bcpTypeBuf); 1416 if (pType == NULL) { 1417 *status = U_ILLEGAL_ARGUMENT_ERROR; 1418 goto cleanup; 1419 } 1420 if (pType == bcpTypeBuf) { 1421 /* 1422 The type returned by toLegacyType points to the input buffer. 1423 We normalize the result type to lower case. 1424 */ 1425 /* normalize to lower case */ 1426 T_CString_toLowerCase(bcpTypeBuf); 1427 if (bufSize - bufIdx - 1 >= bcpTypeLen) { 1428 uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); 1429 pType = buf + bufIdx; 1430 bufIdx += bcpTypeLen; 1431 *(buf + bufIdx) = 0; 1432 bufIdx++; 1433 } else { 1434 *status = U_BUFFER_OVERFLOW_ERROR; 1435 goto cleanup; 1436 } 1437 } 1438 } else { 1439 /* typeless - default type value is "yes" */ 1440 pType = LOCALE_TYPE_YES; 1441 } 1442 1443 /* Special handling for u-va-posix, since we want to treat this as a variant, 1444 not as a keyword */ 1445 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { 1446 *posixVariant = TRUE; 1447 } else { 1448 /* create an ExtensionListEntry for this keyword */ 1449 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1450 if (kwd == NULL) { 1451 *status = U_MEMORY_ALLOCATION_ERROR; 1452 goto cleanup; 1453 } 1454 1455 kwd->key = pKey; 1456 kwd->value = pType; 1457 1458 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1459 *status = U_ILLEGAL_ARGUMENT_ERROR; 1460 uprv_free(kwd); 1461 goto cleanup; 1462 } 1463 } 1464 1465 pBcpKey = pNextBcpKey; 1466 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; 1467 pBcpType = NULL; 1468 bcpTypeLen = 0; 1469 } 1470 } 1471 } 1472 1473 kwd = kwdFirst; 1474 while (kwd != NULL) { 1475 nextKwd = kwd->next; 1476 _addExtensionToList(appendTo, kwd, FALSE); 1477 kwd = nextKwd; 1478 } 1479 1480 return; 1481 1482cleanup: 1483 attr = attrFirst; 1484 while (attr != NULL) { 1485 nextAttr = attr->next; 1486 uprv_free(attr); 1487 attr = nextAttr; 1488 } 1489 1490 kwd = kwdFirst; 1491 while (kwd != NULL) { 1492 nextKwd = kwd->next; 1493 uprv_free(kwd); 1494 kwd = nextKwd; 1495 } 1496} 1497 1498 1499static int32_t 1500_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { 1501 int32_t reslen = 0; 1502 int32_t i, n; 1503 int32_t len; 1504 ExtensionListEntry *kwdFirst = NULL; 1505 ExtensionListEntry *kwd; 1506 const char *key, *type; 1507 char *kwdBuf = NULL; 1508 int32_t kwdBufLength = capacity; 1509 UBool posixVariant = FALSE; 1510 1511 if (U_FAILURE(*status)) { 1512 return 0; 1513 } 1514 1515 kwdBuf = (char*)uprv_malloc(kwdBufLength); 1516 if (kwdBuf == NULL) { 1517 *status = U_MEMORY_ALLOCATION_ERROR; 1518 return 0; 1519 } 1520 1521 /* Determine if variants already exists */ 1522 if (ultag_getVariantsSize(langtag)) { 1523 posixVariant = TRUE; 1524 } 1525 1526 n = ultag_getExtensionsSize(langtag); 1527 1528 /* resolve locale keywords and reordering keys */ 1529 for (i = 0; i < n; i++) { 1530 key = ultag_getExtensionKey(langtag, i); 1531 type = ultag_getExtensionValue(langtag, i); 1532 if (*key == LDMLEXT) { 1533 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); 1534 if (U_FAILURE(*status)) { 1535 break; 1536 } 1537 } else { 1538 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1539 if (kwd == NULL) { 1540 *status = U_MEMORY_ALLOCATION_ERROR; 1541 break; 1542 } 1543 kwd->key = key; 1544 kwd->value = type; 1545 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1546 uprv_free(kwd); 1547 *status = U_ILLEGAL_ARGUMENT_ERROR; 1548 break; 1549 } 1550 } 1551 } 1552 1553 if (U_SUCCESS(*status)) { 1554 type = ultag_getPrivateUse(langtag); 1555 if ((int32_t)uprv_strlen(type) > 0) { 1556 /* add private use as a keyword */ 1557 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1558 if (kwd == NULL) { 1559 *status = U_MEMORY_ALLOCATION_ERROR; 1560 } else { 1561 kwd->key = PRIVATEUSE_KEY; 1562 kwd->value = type; 1563 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1564 uprv_free(kwd); 1565 *status = U_ILLEGAL_ARGUMENT_ERROR; 1566 } 1567 } 1568 } 1569 } 1570 1571 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ 1572 1573 if (U_SUCCESS(*status) && posixVariant) { 1574 len = (int32_t) uprv_strlen(_POSIX); 1575 if (reslen < capacity) { 1576 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); 1577 } 1578 reslen += len; 1579 } 1580 1581 if (U_SUCCESS(*status) && kwdFirst != NULL) { 1582 /* write out the sorted keywords */ 1583 UBool firstValue = TRUE; 1584 kwd = kwdFirst; 1585 do { 1586 if (reslen < capacity) { 1587 if (firstValue) { 1588 /* '@' */ 1589 *(appendAt + reslen) = LOCALE_EXT_SEP; 1590 firstValue = FALSE; 1591 } else { 1592 /* ';' */ 1593 *(appendAt + reslen) = LOCALE_KEYWORD_SEP; 1594 } 1595 } 1596 reslen++; 1597 1598 /* key */ 1599 len = (int32_t)uprv_strlen(kwd->key); 1600 if (reslen < capacity) { 1601 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); 1602 } 1603 reslen += len; 1604 1605 /* '=' */ 1606 if (reslen < capacity) { 1607 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; 1608 } 1609 reslen++; 1610 1611 /* type */ 1612 len = (int32_t)uprv_strlen(kwd->value); 1613 if (reslen < capacity) { 1614 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); 1615 } 1616 reslen += len; 1617 1618 kwd = kwd->next; 1619 } while (kwd); 1620 } 1621 1622 /* clean up */ 1623 kwd = kwdFirst; 1624 while (kwd != NULL) { 1625 ExtensionListEntry *tmpKwd = kwd->next; 1626 uprv_free(kwd); 1627 kwd = tmpKwd; 1628 } 1629 1630 uprv_free(kwdBuf); 1631 1632 if (U_FAILURE(*status)) { 1633 return 0; 1634 } 1635 1636 return u_terminateChars(appendAt, capacity, reslen, status); 1637} 1638 1639static int32_t 1640_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 1641 char buf[ULOC_FULLNAME_CAPACITY]; 1642 char tmpAppend[ULOC_FULLNAME_CAPACITY]; 1643 UErrorCode tmpStatus = U_ZERO_ERROR; 1644 int32_t len, i; 1645 int32_t reslen = 0; 1646 1647 if (U_FAILURE(*status)) { 1648 return 0; 1649 } 1650 1651 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 1652 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1653 if (strict) { 1654 *status = U_ILLEGAL_ARGUMENT_ERROR; 1655 } 1656 return 0; 1657 } 1658 1659 if (len > 0) { 1660 char *p, *pPriv; 1661 UBool bNext = TRUE; 1662 UBool firstValue = TRUE; 1663 UBool writeValue; 1664 1665 pPriv = NULL; 1666 p = buf; 1667 while (bNext) { 1668 writeValue = FALSE; 1669 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 1670 if (*p == 0) { 1671 bNext = FALSE; 1672 } else { 1673 *p = 0; /* terminate */ 1674 } 1675 if (pPriv != NULL) { 1676 /* Private use in the canonical format is lowercase in BCP47 */ 1677 for (i = 0; *(pPriv + i) != 0; i++) { 1678 *(pPriv + i) = uprv_tolower(*(pPriv + i)); 1679 } 1680 1681 /* validate */ 1682 if (_isPrivateuseValueSubtag(pPriv, -1)) { 1683 if (firstValue) { 1684 if (!_isVariantSubtag(pPriv, -1)) { 1685 writeValue = TRUE; 1686 } 1687 } else { 1688 writeValue = TRUE; 1689 } 1690 } else if (strict) { 1691 *status = U_ILLEGAL_ARGUMENT_ERROR; 1692 break; 1693 } else { 1694 break; 1695 } 1696 1697 if (writeValue) { 1698 if (reslen < capacity) { 1699 tmpAppend[reslen++] = SEP; 1700 } 1701 1702 if (firstValue) { 1703 if (reslen < capacity) { 1704 tmpAppend[reslen++] = *PRIVATEUSE_KEY; 1705 } 1706 1707 if (reslen < capacity) { 1708 tmpAppend[reslen++] = SEP; 1709 } 1710 1711 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); 1712 if (reslen < capacity) { 1713 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); 1714 } 1715 reslen += len; 1716 1717 if (reslen < capacity) { 1718 tmpAppend[reslen++] = SEP; 1719 } 1720 1721 firstValue = FALSE; 1722 } 1723 1724 len = (int32_t)uprv_strlen(pPriv); 1725 if (reslen < capacity) { 1726 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); 1727 } 1728 reslen += len; 1729 } 1730 } 1731 /* reset private use starting position */ 1732 pPriv = NULL; 1733 } else if (pPriv == NULL) { 1734 pPriv = p; 1735 } 1736 p++; 1737 } 1738 1739 if (U_FAILURE(*status)) { 1740 return 0; 1741 } 1742 } 1743 1744 if (U_SUCCESS(*status)) { 1745 len = reslen; 1746 if (reslen < capacity) { 1747 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); 1748 } 1749 } 1750 1751 u_terminateChars(appendAt, capacity, reslen, status); 1752 1753 return reslen; 1754} 1755 1756/* 1757* ------------------------------------------------- 1758* 1759* ultag_ functions 1760* 1761* ------------------------------------------------- 1762*/ 1763 1764/* Bit flags used by the parser */ 1765#define LANG 0x0001 1766#define EXTL 0x0002 1767#define SCRT 0x0004 1768#define REGN 0x0008 1769#define VART 0x0010 1770#define EXTS 0x0020 1771#define EXTV 0x0040 1772#define PRIV 0x0080 1773 1774static ULanguageTag* 1775ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { 1776 ULanguageTag *t; 1777 char *tagBuf; 1778 int16_t next; 1779 char *pSubtag, *pNext, *pLastGoodPosition; 1780 int32_t subtagLen; 1781 int32_t extlangIdx; 1782 ExtensionListEntry *pExtension; 1783 char *pExtValueSubtag, *pExtValueSubtagEnd; 1784 int32_t i; 1785 UBool privateuseVar = FALSE; 1786 int32_t grandfatheredLen = 0; 1787 1788 if (parsedLen != NULL) { 1789 *parsedLen = 0; 1790 } 1791 1792 if (U_FAILURE(*status)) { 1793 return NULL; 1794 } 1795 1796 if (tagLen < 0) { 1797 tagLen = (int32_t)uprv_strlen(tag); 1798 } 1799 1800 /* copy the entire string */ 1801 tagBuf = (char*)uprv_malloc(tagLen + 1); 1802 if (tagBuf == NULL) { 1803 *status = U_MEMORY_ALLOCATION_ERROR; 1804 return NULL; 1805 } 1806 uprv_memcpy(tagBuf, tag, tagLen); 1807 *(tagBuf + tagLen) = 0; 1808 1809 /* create a ULanguageTag */ 1810 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); 1811 if (t == NULL) { 1812 uprv_free(tagBuf); 1813 *status = U_MEMORY_ALLOCATION_ERROR; 1814 return NULL; 1815 } 1816 _initializeULanguageTag(t); 1817 t->buf = tagBuf; 1818 1819 if (tagLen < MINLEN) { 1820 /* the input tag is too short - return empty ULanguageTag */ 1821 return t; 1822 } 1823 1824 /* check if the tag is grandfathered */ 1825 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { 1826 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { 1827 int32_t newTagLength; 1828 1829 grandfatheredLen = tagLen; /* back up for output parsedLen */ 1830 newTagLength = uprv_strlen(GRANDFATHERED[i+1]); 1831 if (tagLen < newTagLength) { 1832 uprv_free(tagBuf); 1833 tagBuf = (char*)uprv_malloc(newTagLength + 1); 1834 if (tagBuf == NULL) { 1835 *status = U_MEMORY_ALLOCATION_ERROR; 1836 return NULL; 1837 } 1838 t->buf = tagBuf; 1839 tagLen = newTagLength; 1840 } 1841 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); 1842 break; 1843 } 1844 } 1845 1846 /* 1847 * langtag = language 1848 * ["-" script] 1849 * ["-" region] 1850 * *("-" variant) 1851 * *("-" extension) 1852 * ["-" privateuse] 1853 */ 1854 1855 next = LANG | PRIV; 1856 pNext = pLastGoodPosition = tagBuf; 1857 extlangIdx = 0; 1858 pExtension = NULL; 1859 pExtValueSubtag = NULL; 1860 pExtValueSubtagEnd = NULL; 1861 1862 while (pNext) { 1863 char *pSep; 1864 1865 pSubtag = pNext; 1866 1867 /* locate next separator char */ 1868 pSep = pSubtag; 1869 while (*pSep) { 1870 if (*pSep == SEP) { 1871 break; 1872 } 1873 pSep++; 1874 } 1875 if (*pSep == 0) { 1876 /* last subtag */ 1877 pNext = NULL; 1878 } else { 1879 pNext = pSep + 1; 1880 } 1881 subtagLen = (int32_t)(pSep - pSubtag); 1882 1883 if (next & LANG) { 1884 if (_isLanguageSubtag(pSubtag, subtagLen)) { 1885 *pSep = 0; /* terminate */ 1886 t->language = T_CString_toLowerCase(pSubtag); 1887 1888 pLastGoodPosition = pSep; 1889 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 1890 continue; 1891 } 1892 } 1893 if (next & EXTL) { 1894 if (_isExtlangSubtag(pSubtag, subtagLen)) { 1895 *pSep = 0; 1896 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); 1897 1898 pLastGoodPosition = pSep; 1899 if (extlangIdx < 3) { 1900 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 1901 } else { 1902 next = SCRT | REGN | VART | EXTS | PRIV; 1903 } 1904 continue; 1905 } 1906 } 1907 if (next & SCRT) { 1908 if (_isScriptSubtag(pSubtag, subtagLen)) { 1909 char *p = pSubtag; 1910 1911 *pSep = 0; 1912 1913 /* to title case */ 1914 *p = uprv_toupper(*p); 1915 p++; 1916 for (; *p; p++) { 1917 *p = uprv_tolower(*p); 1918 } 1919 1920 t->script = pSubtag; 1921 1922 pLastGoodPosition = pSep; 1923 next = REGN | VART | EXTS | PRIV; 1924 continue; 1925 } 1926 } 1927 if (next & REGN) { 1928 if (_isRegionSubtag(pSubtag, subtagLen)) { 1929 *pSep = 0; 1930 t->region = T_CString_toUpperCase(pSubtag); 1931 1932 pLastGoodPosition = pSep; 1933 next = VART | EXTS | PRIV; 1934 continue; 1935 } 1936 } 1937 if (next & VART) { 1938 if (_isVariantSubtag(pSubtag, subtagLen) || 1939 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { 1940 VariantListEntry *var; 1941 UBool isAdded; 1942 1943 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 1944 if (var == NULL) { 1945 *status = U_MEMORY_ALLOCATION_ERROR; 1946 goto error; 1947 } 1948 *pSep = 0; 1949 var->variant = T_CString_toUpperCase(pSubtag); 1950 isAdded = _addVariantToList(&(t->variants), var); 1951 if (!isAdded) { 1952 /* duplicated variant entry */ 1953 uprv_free(var); 1954 break; 1955 } 1956 pLastGoodPosition = pSep; 1957 next = VART | EXTS | PRIV; 1958 continue; 1959 } 1960 } 1961 if (next & EXTS) { 1962 if (_isExtensionSingleton(pSubtag, subtagLen)) { 1963 if (pExtension != NULL) { 1964 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 1965 /* the previous extension is incomplete */ 1966 uprv_free(pExtension); 1967 pExtension = NULL; 1968 break; 1969 } 1970 1971 /* terminate the previous extension value */ 1972 *pExtValueSubtagEnd = 0; 1973 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 1974 1975 /* insert the extension to the list */ 1976 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 1977 pLastGoodPosition = pExtValueSubtagEnd; 1978 } else { 1979 /* stop parsing here */ 1980 uprv_free(pExtension); 1981 pExtension = NULL; 1982 break; 1983 } 1984 } 1985 1986 /* create a new extension */ 1987 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1988 if (pExtension == NULL) { 1989 *status = U_MEMORY_ALLOCATION_ERROR; 1990 goto error; 1991 } 1992 *pSep = 0; 1993 pExtension->key = T_CString_toLowerCase(pSubtag); 1994 pExtension->value = NULL; /* will be set later */ 1995 1996 /* 1997 * reset the start and the end location of extension value 1998 * subtags for this extension 1999 */ 2000 pExtValueSubtag = NULL; 2001 pExtValueSubtagEnd = NULL; 2002 2003 next = EXTV; 2004 continue; 2005 } 2006 } 2007 if (next & EXTV) { 2008 if (_isExtensionSubtag(pSubtag, subtagLen)) { 2009 if (pExtValueSubtag == NULL) { 2010 /* if the start postion of this extension's value is not yet, 2011 this one is the first value subtag */ 2012 pExtValueSubtag = pSubtag; 2013 } 2014 2015 /* Mark the end of this subtag */ 2016 pExtValueSubtagEnd = pSep; 2017 next = EXTS | EXTV | PRIV; 2018 2019 continue; 2020 } 2021 } 2022 if (next & PRIV) { 2023 if (uprv_tolower(*pSubtag) == PRIVATEUSE) { 2024 char *pPrivuseVal; 2025 2026 if (pExtension != NULL) { 2027 /* Process the last extension */ 2028 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2029 /* the previous extension is incomplete */ 2030 uprv_free(pExtension); 2031 pExtension = NULL; 2032 break; 2033 } else { 2034 /* terminate the previous extension value */ 2035 *pExtValueSubtagEnd = 0; 2036 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2037 2038 /* insert the extension to the list */ 2039 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2040 pLastGoodPosition = pExtValueSubtagEnd; 2041 pExtension = NULL; 2042 } else { 2043 /* stop parsing here */ 2044 uprv_free(pExtension); 2045 pExtension = NULL; 2046 break; 2047 } 2048 } 2049 } 2050 2051 /* The rest of part will be private use value subtags */ 2052 if (pNext == NULL) { 2053 /* empty private use subtag */ 2054 break; 2055 } 2056 /* back up the private use value start position */ 2057 pPrivuseVal = pNext; 2058 2059 /* validate private use value subtags */ 2060 while (pNext) { 2061 pSubtag = pNext; 2062 pSep = pSubtag; 2063 while (*pSep) { 2064 if (*pSep == SEP) { 2065 break; 2066 } 2067 pSep++; 2068 } 2069 if (*pSep == 0) { 2070 /* last subtag */ 2071 pNext = NULL; 2072 } else { 2073 pNext = pSep + 1; 2074 } 2075 subtagLen = (int32_t)(pSep - pSubtag); 2076 2077 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { 2078 *pSep = 0; 2079 next = VART; 2080 privateuseVar = TRUE; 2081 break; 2082 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { 2083 pLastGoodPosition = pSep; 2084 } else { 2085 break; 2086 } 2087 } 2088 2089 if (next == VART) { 2090 continue; 2091 } 2092 2093 if (pLastGoodPosition - pPrivuseVal > 0) { 2094 *pLastGoodPosition = 0; 2095 t->privateuse = T_CString_toLowerCase(pPrivuseVal); 2096 } 2097 /* No more subtags, exiting the parse loop */ 2098 break; 2099 } 2100 break; 2101 } 2102 2103 /* If we fell through here, it means this subtag is illegal - quit parsing */ 2104 break; 2105 } 2106 2107 if (pExtension != NULL) { 2108 /* Process the last extension */ 2109 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2110 /* the previous extension is incomplete */ 2111 uprv_free(pExtension); 2112 } else { 2113 /* terminate the previous extension value */ 2114 *pExtValueSubtagEnd = 0; 2115 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2116 /* insert the extension to the list */ 2117 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2118 pLastGoodPosition = pExtValueSubtagEnd; 2119 } else { 2120 uprv_free(pExtension); 2121 } 2122 } 2123 } 2124 2125 if (parsedLen != NULL) { 2126 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); 2127 } 2128 2129 return t; 2130 2131error: 2132 uprv_free(t); 2133 return NULL; 2134} 2135 2136static void 2137ultag_close(ULanguageTag* langtag) { 2138 2139 if (langtag == NULL) { 2140 return; 2141 } 2142 2143 uprv_free(langtag->buf); 2144 2145 if (langtag->variants) { 2146 VariantListEntry *curVar = langtag->variants; 2147 while (curVar) { 2148 VariantListEntry *nextVar = curVar->next; 2149 uprv_free(curVar); 2150 curVar = nextVar; 2151 } 2152 } 2153 2154 if (langtag->extensions) { 2155 ExtensionListEntry *curExt = langtag->extensions; 2156 while (curExt) { 2157 ExtensionListEntry *nextExt = curExt->next; 2158 uprv_free(curExt); 2159 curExt = nextExt; 2160 } 2161 } 2162 2163 uprv_free(langtag); 2164} 2165 2166static const char* 2167ultag_getLanguage(const ULanguageTag* langtag) { 2168 return langtag->language; 2169} 2170 2171#if 0 2172static const char* 2173ultag_getJDKLanguage(const ULanguageTag* langtag) { 2174 int32_t i; 2175 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 2176 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { 2177 return DEPRECATEDLANGS[i + 1]; 2178 } 2179 } 2180 return langtag->language; 2181} 2182#endif 2183 2184static const char* 2185ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { 2186 if (idx >= 0 && idx < MAXEXTLANG) { 2187 return langtag->extlang[idx]; 2188 } 2189 return NULL; 2190} 2191 2192static int32_t 2193ultag_getExtlangSize(const ULanguageTag* langtag) { 2194 int32_t size = 0; 2195 int32_t i; 2196 for (i = 0; i < MAXEXTLANG; i++) { 2197 if (langtag->extlang[i]) { 2198 size++; 2199 } 2200 } 2201 return size; 2202} 2203 2204static const char* 2205ultag_getScript(const ULanguageTag* langtag) { 2206 return langtag->script; 2207} 2208 2209static const char* 2210ultag_getRegion(const ULanguageTag* langtag) { 2211 return langtag->region; 2212} 2213 2214static const char* 2215ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { 2216 const char *var = NULL; 2217 VariantListEntry *cur = langtag->variants; 2218 int32_t i = 0; 2219 while (cur) { 2220 if (i == idx) { 2221 var = cur->variant; 2222 break; 2223 } 2224 cur = cur->next; 2225 i++; 2226 } 2227 return var; 2228} 2229 2230static int32_t 2231ultag_getVariantsSize(const ULanguageTag* langtag) { 2232 int32_t size = 0; 2233 VariantListEntry *cur = langtag->variants; 2234 while (TRUE) { 2235 if (cur == NULL) { 2236 break; 2237 } 2238 size++; 2239 cur = cur->next; 2240 } 2241 return size; 2242} 2243 2244static const char* 2245ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { 2246 const char *key = NULL; 2247 ExtensionListEntry *cur = langtag->extensions; 2248 int32_t i = 0; 2249 while (cur) { 2250 if (i == idx) { 2251 key = cur->key; 2252 break; 2253 } 2254 cur = cur->next; 2255 i++; 2256 } 2257 return key; 2258} 2259 2260static const char* 2261ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { 2262 const char *val = NULL; 2263 ExtensionListEntry *cur = langtag->extensions; 2264 int32_t i = 0; 2265 while (cur) { 2266 if (i == idx) { 2267 val = cur->value; 2268 break; 2269 } 2270 cur = cur->next; 2271 i++; 2272 } 2273 return val; 2274} 2275 2276static int32_t 2277ultag_getExtensionsSize(const ULanguageTag* langtag) { 2278 int32_t size = 0; 2279 ExtensionListEntry *cur = langtag->extensions; 2280 while (TRUE) { 2281 if (cur == NULL) { 2282 break; 2283 } 2284 size++; 2285 cur = cur->next; 2286 } 2287 return size; 2288} 2289 2290static const char* 2291ultag_getPrivateUse(const ULanguageTag* langtag) { 2292 return langtag->privateuse; 2293} 2294 2295#if 0 2296static const char* 2297ultag_getGrandfathered(const ULanguageTag* langtag) { 2298 return langtag->grandfathered; 2299} 2300#endif 2301 2302 2303/* 2304* ------------------------------------------------- 2305* 2306* Locale/BCP47 conversion APIs, exposed as uloc_* 2307* 2308* ------------------------------------------------- 2309*/ 2310U_CAPI int32_t U_EXPORT2 2311uloc_toLanguageTag(const char* localeID, 2312 char* langtag, 2313 int32_t langtagCapacity, 2314 UBool strict, 2315 UErrorCode* status) { 2316 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ 2317 char canonical[256]; 2318 int32_t reslen = 0; 2319 UErrorCode tmpStatus = U_ZERO_ERROR; 2320 UBool hadPosix = FALSE; 2321 const char* pKeywordStart; 2322 2323 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ 2324 canonical[0] = 0; 2325 if (uprv_strlen(localeID) > 0) { 2326 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); 2327 if (tmpStatus != U_ZERO_ERROR) { 2328 *status = U_ILLEGAL_ARGUMENT_ERROR; 2329 return 0; 2330 } 2331 } 2332 2333 /* For handling special case - private use only tag */ 2334 pKeywordStart = locale_getKeywordsStart(canonical); 2335 if (pKeywordStart == canonical) { 2336 UEnumeration *kwdEnum; 2337 int kwdCnt = 0; 2338 UBool done = FALSE; 2339 2340 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); 2341 if (kwdEnum != NULL) { 2342 kwdCnt = uenum_count(kwdEnum, &tmpStatus); 2343 if (kwdCnt == 1) { 2344 const char *key; 2345 int32_t len = 0; 2346 2347 key = uenum_next(kwdEnum, &len, &tmpStatus); 2348 if (len == 1 && *key == PRIVATEUSE) { 2349 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 2350 buf[0] = PRIVATEUSE; 2351 buf[1] = SEP; 2352 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); 2353 if (U_SUCCESS(tmpStatus)) { 2354 if (_isPrivateuseValueSubtags(&buf[2], len)) { 2355 /* return private use only tag */ 2356 reslen = len + 2; 2357 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); 2358 u_terminateChars(langtag, langtagCapacity, reslen, status); 2359 done = TRUE; 2360 } else if (strict) { 2361 *status = U_ILLEGAL_ARGUMENT_ERROR; 2362 done = TRUE; 2363 } 2364 /* if not strict mode, then "und" will be returned */ 2365 } else { 2366 *status = U_ILLEGAL_ARGUMENT_ERROR; 2367 done = TRUE; 2368 } 2369 } 2370 } 2371 uenum_close(kwdEnum); 2372 if (done) { 2373 return reslen; 2374 } 2375 } 2376 } 2377 2378 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); 2379 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2380 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2381 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); 2382 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2383 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2384 2385 return reslen; 2386} 2387 2388 2389U_CAPI int32_t U_EXPORT2 2390uloc_forLanguageTag(const char* langtag, 2391 char* localeID, 2392 int32_t localeIDCapacity, 2393 int32_t* parsedLength, 2394 UErrorCode* status) { 2395 ULanguageTag *lt; 2396 int32_t reslen = 0; 2397 const char *subtag, *p; 2398 int32_t len; 2399 int32_t i, n; 2400 UBool noRegion = TRUE; 2401 2402 lt = ultag_parse(langtag, -1, parsedLength, status); 2403 if (U_FAILURE(*status)) { 2404 return 0; 2405 } 2406 2407 /* language */ 2408 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); 2409 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { 2410 len = (int32_t)uprv_strlen(subtag); 2411 if (len > 0) { 2412 if (reslen < localeIDCapacity) { 2413 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); 2414 } 2415 reslen += len; 2416 } 2417 } 2418 2419 /* script */ 2420 subtag = ultag_getScript(lt); 2421 len = (int32_t)uprv_strlen(subtag); 2422 if (len > 0) { 2423 if (reslen < localeIDCapacity) { 2424 *(localeID + reslen) = LOCALE_SEP; 2425 } 2426 reslen++; 2427 2428 /* write out the script in title case */ 2429 p = subtag; 2430 while (*p) { 2431 if (reslen < localeIDCapacity) { 2432 if (p == subtag) { 2433 *(localeID + reslen) = uprv_toupper(*p); 2434 } else { 2435 *(localeID + reslen) = *p; 2436 } 2437 } 2438 reslen++; 2439 p++; 2440 } 2441 } 2442 2443 /* region */ 2444 subtag = ultag_getRegion(lt); 2445 len = (int32_t)uprv_strlen(subtag); 2446 if (len > 0) { 2447 if (reslen < localeIDCapacity) { 2448 *(localeID + reslen) = LOCALE_SEP; 2449 } 2450 reslen++; 2451 /* write out the retion in upper case */ 2452 p = subtag; 2453 while (*p) { 2454 if (reslen < localeIDCapacity) { 2455 *(localeID + reslen) = uprv_toupper(*p); 2456 } 2457 reslen++; 2458 p++; 2459 } 2460 noRegion = FALSE; 2461 } 2462 2463 /* variants */ 2464 n = ultag_getVariantsSize(lt); 2465 if (n > 0) { 2466 if (noRegion) { 2467 if (reslen < localeIDCapacity) { 2468 *(localeID + reslen) = LOCALE_SEP; 2469 } 2470 reslen++; 2471 } 2472 2473 for (i = 0; i < n; i++) { 2474 subtag = ultag_getVariant(lt, i); 2475 if (reslen < localeIDCapacity) { 2476 *(localeID + reslen) = LOCALE_SEP; 2477 } 2478 reslen++; 2479 /* write out the variant in upper case */ 2480 p = subtag; 2481 while (*p) { 2482 if (reslen < localeIDCapacity) { 2483 *(localeID + reslen) = uprv_toupper(*p); 2484 } 2485 reslen++; 2486 p++; 2487 } 2488 } 2489 } 2490 2491 /* keywords */ 2492 n = ultag_getExtensionsSize(lt); 2493 subtag = ultag_getPrivateUse(lt); 2494 if (n > 0 || uprv_strlen(subtag) > 0) { 2495 if (reslen == 0 && n > 0) { 2496 /* need a language */ 2497 if (reslen < localeIDCapacity) { 2498 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); 2499 } 2500 reslen += LANG_UND_LEN; 2501 } 2502 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); 2503 reslen += len; 2504 } 2505 2506 ultag_close(lt); 2507 return u_terminateChars(localeID, localeIDCapacity, reslen, status); 2508} 2509 2510 2511