1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* 6* Copyright (C) 1997-2016, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9******************************************************************************* 10* file name: loclikely.cpp 11* encoding: UTF-8 12* tab size: 8 (not used) 13* indentation:4 14* 15* created on: 2010feb25 16* created by: Markus W. Scherer 17* 18* Code for likely and minimized locale subtags, separated out from other .cpp files 19* that then do not depend on resource bundle code and likely-subtags data. 20*/ 21 22#include "unicode/utypes.h" 23#include "unicode/locid.h" 24#include "unicode/putil.h" 25#include "unicode/uchar.h" 26#include "unicode/uloc.h" 27#include "unicode/ures.h" 28#include "unicode/uscript.h" 29#include "cmemory.h" 30#include "cstring.h" 31#include "ulocimp.h" 32#include "ustr_imp.h" 33 34/** 35 * This function looks for the localeID in the likelySubtags resource. 36 * 37 * @param localeID The tag to find. 38 * @param buffer A buffer to hold the matching entry 39 * @param bufferLength The length of the output buffer 40 * @return A pointer to "buffer" if found, or a null pointer if not. 41 */ 42static const char* U_CALLCONV 43findLikelySubtags(const char* localeID, 44 char* buffer, 45 int32_t bufferLength, 46 UErrorCode* err) { 47 const char* result = NULL; 48 49 if (!U_FAILURE(*err)) { 50 int32_t resLen = 0; 51 const UChar* s = NULL; 52 UErrorCode tmpErr = U_ZERO_ERROR; 53 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); 54 if (U_SUCCESS(tmpErr)) { 55 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); 56 57 if (U_FAILURE(tmpErr)) { 58 /* 59 * If a resource is missing, it's not really an error, it's 60 * just that we don't have any data for that particular locale ID. 61 */ 62 if (tmpErr != U_MISSING_RESOURCE_ERROR) { 63 *err = tmpErr; 64 } 65 } 66 else if (resLen >= bufferLength) { 67 /* The buffer should never overflow. */ 68 *err = U_INTERNAL_PROGRAM_ERROR; 69 } 70 else { 71 u_UCharsToChars(s, buffer, resLen + 1); 72 result = buffer; 73 } 74 75 ures_close(subtags); 76 } else { 77 *err = tmpErr; 78 } 79 } 80 81 return result; 82} 83 84/** 85 * Append a tag to a buffer, adding the separator if necessary. The buffer 86 * must be large enough to contain the resulting tag plus any separator 87 * necessary. The tag must not be a zero-length string. 88 * 89 * @param tag The tag to add. 90 * @param tagLength The length of the tag. 91 * @param buffer The output buffer. 92 * @param bufferLength The length of the output buffer. This is an input/ouput parameter. 93 **/ 94static void U_CALLCONV 95appendTag( 96 const char* tag, 97 int32_t tagLength, 98 char* buffer, 99 int32_t* bufferLength) { 100 101 if (*bufferLength > 0) { 102 buffer[*bufferLength] = '_'; 103 ++(*bufferLength); 104 } 105 106 uprv_memmove( 107 &buffer[*bufferLength], 108 tag, 109 tagLength); 110 111 *bufferLength += tagLength; 112} 113 114/** 115 * These are the canonical strings for unknown languages, scripts and regions. 116 **/ 117static const char* const unknownLanguage = "und"; 118static const char* const unknownScript = "Zzzz"; 119static const char* const unknownRegion = "ZZ"; 120 121/** 122 * Create a tag string from the supplied parameters. The lang, script and region 123 * parameters may be NULL pointers. If they are, their corresponding length parameters 124 * must be less than or equal to 0. 125 * 126 * If any of the language, script or region parameters are empty, and the alternateTags 127 * parameter is not NULL, it will be parsed for potential language, script and region tags 128 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or 129 * it contains no language tag, the default tag for the unknown language is used. 130 * 131 * If the length of the new string exceeds the capacity of the output buffer, 132 * the function copies as many bytes to the output buffer as it can, and returns 133 * the error U_BUFFER_OVERFLOW_ERROR. 134 * 135 * If an illegal argument is provided, the function returns the error 136 * U_ILLEGAL_ARGUMENT_ERROR. 137 * 138 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if 139 * the tag string fits in the output buffer, but the null terminator doesn't. 140 * 141 * @param lang The language tag to use. 142 * @param langLength The length of the language tag. 143 * @param script The script tag to use. 144 * @param scriptLength The length of the script tag. 145 * @param region The region tag to use. 146 * @param regionLength The length of the region tag. 147 * @param trailing Any trailing data to append to the new tag. 148 * @param trailingLength The length of the trailing data. 149 * @param alternateTags A string containing any alternate tags. 150 * @param tag The output buffer. 151 * @param tagCapacity The capacity of the output buffer. 152 * @param err A pointer to a UErrorCode for error reporting. 153 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. 154 **/ 155static int32_t U_CALLCONV 156createTagStringWithAlternates( 157 const char* lang, 158 int32_t langLength, 159 const char* script, 160 int32_t scriptLength, 161 const char* region, 162 int32_t regionLength, 163 const char* trailing, 164 int32_t trailingLength, 165 const char* alternateTags, 166 char* tag, 167 int32_t tagCapacity, 168 UErrorCode* err) { 169 170 if (U_FAILURE(*err)) { 171 goto error; 172 } 173 else if (tag == NULL || 174 tagCapacity <= 0 || 175 langLength >= ULOC_LANG_CAPACITY || 176 scriptLength >= ULOC_SCRIPT_CAPACITY || 177 regionLength >= ULOC_COUNTRY_CAPACITY) { 178 goto error; 179 } 180 else { 181 /** 182 * ULOC_FULLNAME_CAPACITY will provide enough capacity 183 * that we can build a string that contains the language, 184 * script and region code without worrying about overrunning 185 * the user-supplied buffer. 186 **/ 187 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 188 int32_t tagLength = 0; 189 int32_t capacityRemaining = tagCapacity; 190 UBool regionAppended = FALSE; 191 192 if (langLength > 0) { 193 appendTag( 194 lang, 195 langLength, 196 tagBuffer, 197 &tagLength); 198 } 199 else if (alternateTags == NULL) { 200 /* 201 * Append the value for an unknown language, if 202 * we found no language. 203 */ 204 appendTag( 205 unknownLanguage, 206 (int32_t)uprv_strlen(unknownLanguage), 207 tagBuffer, 208 &tagLength); 209 } 210 else { 211 /* 212 * Parse the alternateTags string for the language. 213 */ 214 char alternateLang[ULOC_LANG_CAPACITY]; 215 int32_t alternateLangLength = sizeof(alternateLang); 216 217 alternateLangLength = 218 uloc_getLanguage( 219 alternateTags, 220 alternateLang, 221 alternateLangLength, 222 err); 223 if(U_FAILURE(*err) || 224 alternateLangLength >= ULOC_LANG_CAPACITY) { 225 goto error; 226 } 227 else if (alternateLangLength == 0) { 228 /* 229 * Append the value for an unknown language, if 230 * we found no language. 231 */ 232 appendTag( 233 unknownLanguage, 234 (int32_t)uprv_strlen(unknownLanguage), 235 tagBuffer, 236 &tagLength); 237 } 238 else { 239 appendTag( 240 alternateLang, 241 alternateLangLength, 242 tagBuffer, 243 &tagLength); 244 } 245 } 246 247 if (scriptLength > 0) { 248 appendTag( 249 script, 250 scriptLength, 251 tagBuffer, 252 &tagLength); 253 } 254 else if (alternateTags != NULL) { 255 /* 256 * Parse the alternateTags string for the script. 257 */ 258 char alternateScript[ULOC_SCRIPT_CAPACITY]; 259 260 const int32_t alternateScriptLength = 261 uloc_getScript( 262 alternateTags, 263 alternateScript, 264 sizeof(alternateScript), 265 err); 266 267 if (U_FAILURE(*err) || 268 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { 269 goto error; 270 } 271 else if (alternateScriptLength > 0) { 272 appendTag( 273 alternateScript, 274 alternateScriptLength, 275 tagBuffer, 276 &tagLength); 277 } 278 } 279 280 if (regionLength > 0) { 281 appendTag( 282 region, 283 regionLength, 284 tagBuffer, 285 &tagLength); 286 287 regionAppended = TRUE; 288 } 289 else if (alternateTags != NULL) { 290 /* 291 * Parse the alternateTags string for the region. 292 */ 293 char alternateRegion[ULOC_COUNTRY_CAPACITY]; 294 295 const int32_t alternateRegionLength = 296 uloc_getCountry( 297 alternateTags, 298 alternateRegion, 299 sizeof(alternateRegion), 300 err); 301 if (U_FAILURE(*err) || 302 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { 303 goto error; 304 } 305 else if (alternateRegionLength > 0) { 306 appendTag( 307 alternateRegion, 308 alternateRegionLength, 309 tagBuffer, 310 &tagLength); 311 312 regionAppended = TRUE; 313 } 314 } 315 316 { 317 const int32_t toCopy = 318 tagLength >= tagCapacity ? tagCapacity : tagLength; 319 320 /** 321 * Copy the partial tag from our internal buffer to the supplied 322 * target. 323 **/ 324 uprv_memcpy( 325 tag, 326 tagBuffer, 327 toCopy); 328 329 capacityRemaining -= toCopy; 330 } 331 332 if (trailingLength > 0) { 333 if (*trailing != '@' && capacityRemaining > 0) { 334 tag[tagLength++] = '_'; 335 --capacityRemaining; 336 if (capacityRemaining > 0 && !regionAppended) { 337 /* extra separator is required */ 338 tag[tagLength++] = '_'; 339 --capacityRemaining; 340 } 341 } 342 343 if (capacityRemaining > 0) { 344 /* 345 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we 346 * don't know if the user-supplied buffers overlap. 347 */ 348 const int32_t toCopy = 349 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; 350 351 uprv_memmove( 352 &tag[tagLength], 353 trailing, 354 toCopy); 355 } 356 } 357 358 tagLength += trailingLength; 359 360 return u_terminateChars( 361 tag, 362 tagCapacity, 363 tagLength, 364 err); 365 } 366 367error: 368 369 /** 370 * An overflow indicates the locale ID passed in 371 * is ill-formed. If we got here, and there was 372 * no previous error, it's an implicit overflow. 373 **/ 374 if (*err == U_BUFFER_OVERFLOW_ERROR || 375 U_SUCCESS(*err)) { 376 *err = U_ILLEGAL_ARGUMENT_ERROR; 377 } 378 379 return -1; 380} 381 382/** 383 * Create a tag string from the supplied parameters. The lang, script and region 384 * parameters may be NULL pointers. If they are, their corresponding length parameters 385 * must be less than or equal to 0. If the lang parameter is an empty string, the 386 * default value for an unknown language is written to the output buffer. 387 * 388 * If the length of the new string exceeds the capacity of the output buffer, 389 * the function copies as many bytes to the output buffer as it can, and returns 390 * the error U_BUFFER_OVERFLOW_ERROR. 391 * 392 * If an illegal argument is provided, the function returns the error 393 * U_ILLEGAL_ARGUMENT_ERROR. 394 * 395 * @param lang The language tag to use. 396 * @param langLength The length of the language tag. 397 * @param script The script tag to use. 398 * @param scriptLength The length of the script tag. 399 * @param region The region tag to use. 400 * @param regionLength The length of the region tag. 401 * @param trailing Any trailing data to append to the new tag. 402 * @param trailingLength The length of the trailing data. 403 * @param tag The output buffer. 404 * @param tagCapacity The capacity of the output buffer. 405 * @param err A pointer to a UErrorCode for error reporting. 406 * @return The length of the tag string, which may be greater than tagCapacity. 407 **/ 408static int32_t U_CALLCONV 409createTagString( 410 const char* lang, 411 int32_t langLength, 412 const char* script, 413 int32_t scriptLength, 414 const char* region, 415 int32_t regionLength, 416 const char* trailing, 417 int32_t trailingLength, 418 char* tag, 419 int32_t tagCapacity, 420 UErrorCode* err) 421{ 422 return createTagStringWithAlternates( 423 lang, 424 langLength, 425 script, 426 scriptLength, 427 region, 428 regionLength, 429 trailing, 430 trailingLength, 431 NULL, 432 tag, 433 tagCapacity, 434 err); 435} 436 437/** 438 * Parse the language, script, and region subtags from a tag string, and copy the 439 * results into the corresponding output parameters. The buffers are null-terminated, 440 * unless overflow occurs. 441 * 442 * The langLength, scriptLength, and regionLength parameters are input/output 443 * parameters, and must contain the capacity of their corresponding buffers on 444 * input. On output, they will contain the actual length of the buffers, not 445 * including the null terminator. 446 * 447 * If the length of any of the output subtags exceeds the capacity of the corresponding 448 * buffer, the function copies as many bytes to the output buffer as it can, and returns 449 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow 450 * occurs. 451 * 452 * If an illegal argument is provided, the function returns the error 453 * U_ILLEGAL_ARGUMENT_ERROR. 454 * 455 * @param localeID The locale ID to parse. 456 * @param lang The language tag buffer. 457 * @param langLength The length of the language tag. 458 * @param script The script tag buffer. 459 * @param scriptLength The length of the script tag. 460 * @param region The region tag buffer. 461 * @param regionLength The length of the region tag. 462 * @param err A pointer to a UErrorCode for error reporting. 463 * @return The number of chars of the localeID parameter consumed. 464 **/ 465static int32_t U_CALLCONV 466parseTagString( 467 const char* localeID, 468 char* lang, 469 int32_t* langLength, 470 char* script, 471 int32_t* scriptLength, 472 char* region, 473 int32_t* regionLength, 474 UErrorCode* err) 475{ 476 const char* position = localeID; 477 int32_t subtagLength = 0; 478 479 if(U_FAILURE(*err) || 480 localeID == NULL || 481 lang == NULL || 482 langLength == NULL || 483 script == NULL || 484 scriptLength == NULL || 485 region == NULL || 486 regionLength == NULL) { 487 goto error; 488 } 489 490 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); 491 u_terminateChars(lang, *langLength, subtagLength, err); 492 493 /* 494 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING 495 * to be an error, because it indicates the user-supplied tag is 496 * not well-formed. 497 */ 498 if(U_FAILURE(*err)) { 499 goto error; 500 } 501 502 *langLength = subtagLength; 503 504 /* 505 * If no language was present, use the value of unknownLanguage 506 * instead. Otherwise, move past any separator. 507 */ 508 if (*langLength == 0) { 509 uprv_strcpy( 510 lang, 511 unknownLanguage); 512 *langLength = (int32_t)uprv_strlen(lang); 513 } 514 if (_isIDSeparator(*position)) { 515 ++position; 516 } 517 518 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); 519 u_terminateChars(script, *scriptLength, subtagLength, err); 520 521 if(U_FAILURE(*err)) { 522 goto error; 523 } 524 525 *scriptLength = subtagLength; 526 527 if (*scriptLength > 0) { 528 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { 529 /** 530 * If the script part is the "unknown" script, then don't return it. 531 **/ 532 *scriptLength = 0; 533 } 534 535 /* 536 * Move past any separator. 537 */ 538 if (_isIDSeparator(*position)) { 539 ++position; 540 } 541 } 542 543 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); 544 u_terminateChars(region, *regionLength, subtagLength, err); 545 546 if(U_FAILURE(*err)) { 547 goto error; 548 } 549 550 *regionLength = subtagLength; 551 552 if (*regionLength > 0) { 553 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { 554 /** 555 * If the region part is the "unknown" region, then don't return it. 556 **/ 557 *regionLength = 0; 558 } 559 } else if (*position != 0 && *position != '@') { 560 /* back up over consumed trailing separator */ 561 --position; 562 } 563 564exit: 565 566 return (int32_t)(position - localeID); 567 568error: 569 570 /** 571 * If we get here, we have no explicit error, it's the result of an 572 * illegal argument. 573 **/ 574 if (!U_FAILURE(*err)) { 575 *err = U_ILLEGAL_ARGUMENT_ERROR; 576 } 577 578 goto exit; 579} 580 581static int32_t U_CALLCONV 582createLikelySubtagsString( 583 const char* lang, 584 int32_t langLength, 585 const char* script, 586 int32_t scriptLength, 587 const char* region, 588 int32_t regionLength, 589 const char* variants, 590 int32_t variantsLength, 591 char* tag, 592 int32_t tagCapacity, 593 UErrorCode* err) 594{ 595 /** 596 * ULOC_FULLNAME_CAPACITY will provide enough capacity 597 * that we can build a string that contains the language, 598 * script and region code without worrying about overrunning 599 * the user-supplied buffer. 600 **/ 601 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 602 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; 603 604 if(U_FAILURE(*err)) { 605 goto error; 606 } 607 608 /** 609 * Try the language with the script and region first. 610 **/ 611 if (scriptLength > 0 && regionLength > 0) { 612 613 const char* likelySubtags = NULL; 614 615 createTagString( 616 lang, 617 langLength, 618 script, 619 scriptLength, 620 region, 621 regionLength, 622 NULL, 623 0, 624 tagBuffer, 625 sizeof(tagBuffer), 626 err); 627 if(U_FAILURE(*err)) { 628 goto error; 629 } 630 631 likelySubtags = 632 findLikelySubtags( 633 tagBuffer, 634 likelySubtagsBuffer, 635 sizeof(likelySubtagsBuffer), 636 err); 637 if(U_FAILURE(*err)) { 638 goto error; 639 } 640 641 if (likelySubtags != NULL) { 642 /* Always use the language tag from the 643 maximal string, since it may be more 644 specific than the one provided. */ 645 return createTagStringWithAlternates( 646 NULL, 647 0, 648 NULL, 649 0, 650 NULL, 651 0, 652 variants, 653 variantsLength, 654 likelySubtags, 655 tag, 656 tagCapacity, 657 err); 658 } 659 } 660 661 /** 662 * Try the language with just the script. 663 **/ 664 if (scriptLength > 0) { 665 666 const char* likelySubtags = NULL; 667 668 createTagString( 669 lang, 670 langLength, 671 script, 672 scriptLength, 673 NULL, 674 0, 675 NULL, 676 0, 677 tagBuffer, 678 sizeof(tagBuffer), 679 err); 680 if(U_FAILURE(*err)) { 681 goto error; 682 } 683 684 likelySubtags = 685 findLikelySubtags( 686 tagBuffer, 687 likelySubtagsBuffer, 688 sizeof(likelySubtagsBuffer), 689 err); 690 if(U_FAILURE(*err)) { 691 goto error; 692 } 693 694 if (likelySubtags != NULL) { 695 /* Always use the language tag from the 696 maximal string, since it may be more 697 specific than the one provided. */ 698 return createTagStringWithAlternates( 699 NULL, 700 0, 701 NULL, 702 0, 703 region, 704 regionLength, 705 variants, 706 variantsLength, 707 likelySubtags, 708 tag, 709 tagCapacity, 710 err); 711 } 712 } 713 714 /** 715 * Try the language with just the region. 716 **/ 717 if (regionLength > 0) { 718 719 const char* likelySubtags = NULL; 720 721 createTagString( 722 lang, 723 langLength, 724 NULL, 725 0, 726 region, 727 regionLength, 728 NULL, 729 0, 730 tagBuffer, 731 sizeof(tagBuffer), 732 err); 733 if(U_FAILURE(*err)) { 734 goto error; 735 } 736 737 likelySubtags = 738 findLikelySubtags( 739 tagBuffer, 740 likelySubtagsBuffer, 741 sizeof(likelySubtagsBuffer), 742 err); 743 if(U_FAILURE(*err)) { 744 goto error; 745 } 746 747 if (likelySubtags != NULL) { 748 /* Always use the language tag from the 749 maximal string, since it may be more 750 specific than the one provided. */ 751 return createTagStringWithAlternates( 752 NULL, 753 0, 754 script, 755 scriptLength, 756 NULL, 757 0, 758 variants, 759 variantsLength, 760 likelySubtags, 761 tag, 762 tagCapacity, 763 err); 764 } 765 } 766 767 /** 768 * Finally, try just the language. 769 **/ 770 { 771 const char* likelySubtags = NULL; 772 773 createTagString( 774 lang, 775 langLength, 776 NULL, 777 0, 778 NULL, 779 0, 780 NULL, 781 0, 782 tagBuffer, 783 sizeof(tagBuffer), 784 err); 785 if(U_FAILURE(*err)) { 786 goto error; 787 } 788 789 likelySubtags = 790 findLikelySubtags( 791 tagBuffer, 792 likelySubtagsBuffer, 793 sizeof(likelySubtagsBuffer), 794 err); 795 if(U_FAILURE(*err)) { 796 goto error; 797 } 798 799 if (likelySubtags != NULL) { 800 /* Always use the language tag from the 801 maximal string, since it may be more 802 specific than the one provided. */ 803 return createTagStringWithAlternates( 804 NULL, 805 0, 806 script, 807 scriptLength, 808 region, 809 regionLength, 810 variants, 811 variantsLength, 812 likelySubtags, 813 tag, 814 tagCapacity, 815 err); 816 } 817 } 818 819 return u_terminateChars( 820 tag, 821 tagCapacity, 822 0, 823 err); 824 825error: 826 827 if (!U_FAILURE(*err)) { 828 *err = U_ILLEGAL_ARGUMENT_ERROR; 829 } 830 831 return -1; 832} 833 834#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ 835 { int32_t count = 0; \ 836 int32_t i; \ 837 for (i = 0; i < trailingLength; i++) { \ 838 if (trailing[i] == '-' || trailing[i] == '_') { \ 839 count = 0; \ 840 if (count > 8) { \ 841 goto error; \ 842 } \ 843 } else if (trailing[i] == '@') { \ 844 break; \ 845 } else if (count > 8) { \ 846 goto error; \ 847 } else { \ 848 count++; \ 849 } \ 850 } \ 851 } 852 853static int32_t 854_uloc_addLikelySubtags(const char* localeID, 855 char* maximizedLocaleID, 856 int32_t maximizedLocaleIDCapacity, 857 UErrorCode* err) 858{ 859 char lang[ULOC_LANG_CAPACITY]; 860 int32_t langLength = sizeof(lang); 861 char script[ULOC_SCRIPT_CAPACITY]; 862 int32_t scriptLength = sizeof(script); 863 char region[ULOC_COUNTRY_CAPACITY]; 864 int32_t regionLength = sizeof(region); 865 const char* trailing = ""; 866 int32_t trailingLength = 0; 867 int32_t trailingIndex = 0; 868 int32_t resultLength = 0; 869 870 if(U_FAILURE(*err)) { 871 goto error; 872 } 873 else if (localeID == NULL || 874 maximizedLocaleID == NULL || 875 maximizedLocaleIDCapacity <= 0) { 876 goto error; 877 } 878 879 trailingIndex = parseTagString( 880 localeID, 881 lang, 882 &langLength, 883 script, 884 &scriptLength, 885 region, 886 ®ionLength, 887 err); 888 if(U_FAILURE(*err)) { 889 /* Overflow indicates an illegal argument error */ 890 if (*err == U_BUFFER_OVERFLOW_ERROR) { 891 *err = U_ILLEGAL_ARGUMENT_ERROR; 892 } 893 894 goto error; 895 } 896 897 /* Find the length of the trailing portion. */ 898 while (_isIDSeparator(localeID[trailingIndex])) { 899 trailingIndex++; 900 } 901 trailing = &localeID[trailingIndex]; 902 trailingLength = (int32_t)uprv_strlen(trailing); 903 904 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 905 906 resultLength = 907 createLikelySubtagsString( 908 lang, 909 langLength, 910 script, 911 scriptLength, 912 region, 913 regionLength, 914 trailing, 915 trailingLength, 916 maximizedLocaleID, 917 maximizedLocaleIDCapacity, 918 err); 919 920 if (resultLength == 0) { 921 const int32_t localIDLength = (int32_t)uprv_strlen(localeID); 922 923 /* 924 * If we get here, we need to return localeID. 925 */ 926 uprv_memcpy( 927 maximizedLocaleID, 928 localeID, 929 localIDLength <= maximizedLocaleIDCapacity ? 930 localIDLength : maximizedLocaleIDCapacity); 931 932 resultLength = 933 u_terminateChars( 934 maximizedLocaleID, 935 maximizedLocaleIDCapacity, 936 localIDLength, 937 err); 938 } 939 940 return resultLength; 941 942error: 943 944 if (!U_FAILURE(*err)) { 945 *err = U_ILLEGAL_ARGUMENT_ERROR; 946 } 947 948 return -1; 949} 950 951static int32_t 952_uloc_minimizeSubtags(const char* localeID, 953 char* minimizedLocaleID, 954 int32_t minimizedLocaleIDCapacity, 955 UErrorCode* err) 956{ 957 /** 958 * ULOC_FULLNAME_CAPACITY will provide enough capacity 959 * that we can build a string that contains the language, 960 * script and region code without worrying about overrunning 961 * the user-supplied buffer. 962 **/ 963 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; 964 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); 965 966 char lang[ULOC_LANG_CAPACITY]; 967 int32_t langLength = sizeof(lang); 968 char script[ULOC_SCRIPT_CAPACITY]; 969 int32_t scriptLength = sizeof(script); 970 char region[ULOC_COUNTRY_CAPACITY]; 971 int32_t regionLength = sizeof(region); 972 const char* trailing = ""; 973 int32_t trailingLength = 0; 974 int32_t trailingIndex = 0; 975 976 if(U_FAILURE(*err)) { 977 goto error; 978 } 979 else if (localeID == NULL || 980 minimizedLocaleID == NULL || 981 minimizedLocaleIDCapacity <= 0) { 982 goto error; 983 } 984 985 trailingIndex = 986 parseTagString( 987 localeID, 988 lang, 989 &langLength, 990 script, 991 &scriptLength, 992 region, 993 ®ionLength, 994 err); 995 if(U_FAILURE(*err)) { 996 997 /* Overflow indicates an illegal argument error */ 998 if (*err == U_BUFFER_OVERFLOW_ERROR) { 999 *err = U_ILLEGAL_ARGUMENT_ERROR; 1000 } 1001 1002 goto error; 1003 } 1004 1005 /* Find the spot where the variants or the keywords begin, if any. */ 1006 while (_isIDSeparator(localeID[trailingIndex])) { 1007 trailingIndex++; 1008 } 1009 trailing = &localeID[trailingIndex]; 1010 trailingLength = (int32_t)uprv_strlen(trailing); 1011 1012 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 1013 1014 createTagString( 1015 lang, 1016 langLength, 1017 script, 1018 scriptLength, 1019 region, 1020 regionLength, 1021 NULL, 1022 0, 1023 maximizedTagBuffer, 1024 maximizedTagBufferLength, 1025 err); 1026 if(U_FAILURE(*err)) { 1027 goto error; 1028 } 1029 1030 /** 1031 * First, we need to first get the maximization 1032 * from AddLikelySubtags. 1033 **/ 1034 maximizedTagBufferLength = 1035 uloc_addLikelySubtags( 1036 maximizedTagBuffer, 1037 maximizedTagBuffer, 1038 maximizedTagBufferLength, 1039 err); 1040 1041 if(U_FAILURE(*err)) { 1042 goto error; 1043 } 1044 1045 /** 1046 * Start first with just the language. 1047 **/ 1048 { 1049 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1050 1051 const int32_t tagBufferLength = 1052 createLikelySubtagsString( 1053 lang, 1054 langLength, 1055 NULL, 1056 0, 1057 NULL, 1058 0, 1059 NULL, 1060 0, 1061 tagBuffer, 1062 sizeof(tagBuffer), 1063 err); 1064 1065 if(U_FAILURE(*err)) { 1066 goto error; 1067 } 1068 else if (uprv_strnicmp( 1069 maximizedTagBuffer, 1070 tagBuffer, 1071 tagBufferLength) == 0) { 1072 1073 return createTagString( 1074 lang, 1075 langLength, 1076 NULL, 1077 0, 1078 NULL, 1079 0, 1080 trailing, 1081 trailingLength, 1082 minimizedLocaleID, 1083 minimizedLocaleIDCapacity, 1084 err); 1085 } 1086 } 1087 1088 /** 1089 * Next, try the language and region. 1090 **/ 1091 if (regionLength > 0) { 1092 1093 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1094 1095 const int32_t tagBufferLength = 1096 createLikelySubtagsString( 1097 lang, 1098 langLength, 1099 NULL, 1100 0, 1101 region, 1102 regionLength, 1103 NULL, 1104 0, 1105 tagBuffer, 1106 sizeof(tagBuffer), 1107 err); 1108 1109 if(U_FAILURE(*err)) { 1110 goto error; 1111 } 1112 else if (uprv_strnicmp( 1113 maximizedTagBuffer, 1114 tagBuffer, 1115 tagBufferLength) == 0) { 1116 1117 return createTagString( 1118 lang, 1119 langLength, 1120 NULL, 1121 0, 1122 region, 1123 regionLength, 1124 trailing, 1125 trailingLength, 1126 minimizedLocaleID, 1127 minimizedLocaleIDCapacity, 1128 err); 1129 } 1130 } 1131 1132 /** 1133 * Finally, try the language and script. This is our last chance, 1134 * since trying with all three subtags would only yield the 1135 * maximal version that we already have. 1136 **/ 1137 if (scriptLength > 0 && regionLength > 0) { 1138 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1139 1140 const int32_t tagBufferLength = 1141 createLikelySubtagsString( 1142 lang, 1143 langLength, 1144 script, 1145 scriptLength, 1146 NULL, 1147 0, 1148 NULL, 1149 0, 1150 tagBuffer, 1151 sizeof(tagBuffer), 1152 err); 1153 1154 if(U_FAILURE(*err)) { 1155 goto error; 1156 } 1157 else if (uprv_strnicmp( 1158 maximizedTagBuffer, 1159 tagBuffer, 1160 tagBufferLength) == 0) { 1161 1162 return createTagString( 1163 lang, 1164 langLength, 1165 script, 1166 scriptLength, 1167 NULL, 1168 0, 1169 trailing, 1170 trailingLength, 1171 minimizedLocaleID, 1172 minimizedLocaleIDCapacity, 1173 err); 1174 } 1175 } 1176 1177 { 1178 /** 1179 * If we got here, return the locale ID parameter. 1180 **/ 1181 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); 1182 1183 uprv_memcpy( 1184 minimizedLocaleID, 1185 localeID, 1186 localeIDLength <= minimizedLocaleIDCapacity ? 1187 localeIDLength : minimizedLocaleIDCapacity); 1188 1189 return u_terminateChars( 1190 minimizedLocaleID, 1191 minimizedLocaleIDCapacity, 1192 localeIDLength, 1193 err); 1194 } 1195 1196error: 1197 1198 if (!U_FAILURE(*err)) { 1199 *err = U_ILLEGAL_ARGUMENT_ERROR; 1200 } 1201 1202 return -1; 1203 1204 1205} 1206 1207static UBool 1208do_canonicalize(const char* localeID, 1209 char* buffer, 1210 int32_t bufferCapacity, 1211 UErrorCode* err) 1212{ 1213 uloc_canonicalize( 1214 localeID, 1215 buffer, 1216 bufferCapacity, 1217 err); 1218 1219 if (*err == U_STRING_NOT_TERMINATED_WARNING || 1220 *err == U_BUFFER_OVERFLOW_ERROR) { 1221 *err = U_ILLEGAL_ARGUMENT_ERROR; 1222 1223 return FALSE; 1224 } 1225 else if (U_FAILURE(*err)) { 1226 1227 return FALSE; 1228 } 1229 else { 1230 return TRUE; 1231 } 1232} 1233 1234U_CAPI int32_t U_EXPORT2 1235uloc_addLikelySubtags(const char* localeID, 1236 char* maximizedLocaleID, 1237 int32_t maximizedLocaleIDCapacity, 1238 UErrorCode* err) 1239{ 1240 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1241 1242 if (!do_canonicalize( 1243 localeID, 1244 localeBuffer, 1245 sizeof(localeBuffer), 1246 err)) { 1247 return -1; 1248 } 1249 else { 1250 return _uloc_addLikelySubtags( 1251 localeBuffer, 1252 maximizedLocaleID, 1253 maximizedLocaleIDCapacity, 1254 err); 1255 } 1256} 1257 1258U_CAPI int32_t U_EXPORT2 1259uloc_minimizeSubtags(const char* localeID, 1260 char* minimizedLocaleID, 1261 int32_t minimizedLocaleIDCapacity, 1262 UErrorCode* err) 1263{ 1264 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1265 1266 if (!do_canonicalize( 1267 localeID, 1268 localeBuffer, 1269 sizeof(localeBuffer), 1270 err)) { 1271 return -1; 1272 } 1273 else { 1274 return _uloc_minimizeSubtags( 1275 localeBuffer, 1276 minimizedLocaleID, 1277 minimizedLocaleIDCapacity, 1278 err); 1279 } 1280} 1281 1282// Pairs of (language subtag, + or -) for finding out fast if common languages 1283// are LTR (minus) or RTL (plus). 1284static const char LANG_DIR_STRING[] = 1285 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; 1286 1287// Implemented here because this calls uloc_addLikelySubtags(). 1288U_CAPI UBool U_EXPORT2 1289uloc_isRightToLeft(const char *locale) { 1290 UErrorCode errorCode = U_ZERO_ERROR; 1291 char script[8]; 1292 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode); 1293 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || 1294 scriptLength == 0) { 1295 // Fastpath: We know the likely scripts and their writing direction 1296 // for some common languages. 1297 errorCode = U_ZERO_ERROR; 1298 char lang[8]; 1299 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); 1300 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || 1301 langLength == 0) { 1302 return FALSE; 1303 } 1304 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); 1305 if (langPtr != NULL) { 1306 switch (langPtr[langLength]) { 1307 case '-': return FALSE; 1308 case '+': return TRUE; 1309 default: break; // partial match of a longer code 1310 } 1311 } 1312 // Otherwise, find the likely script. 1313 errorCode = U_ZERO_ERROR; 1314 char likely[ULOC_FULLNAME_CAPACITY]; 1315 (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode); 1316 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { 1317 return FALSE; 1318 } 1319 scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode); 1320 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || 1321 scriptLength == 0) { 1322 return FALSE; 1323 } 1324 } 1325 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); 1326 return uscript_isRightToLeft(scriptCode); 1327} 1328 1329U_NAMESPACE_BEGIN 1330 1331UBool 1332Locale::isRightToLeft() const { 1333 return uloc_isRightToLeft(getBaseName()); 1334} 1335 1336U_NAMESPACE_END 1337 1338// The following must at least allow for rg key value (6) plus terminator (1). 1339#define ULOC_RG_BUFLEN 8 1340 1341U_CAPI int32_t U_EXPORT2 1342ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, 1343 char *region, int32_t regionCapacity, UErrorCode* status) { 1344 if (U_FAILURE(*status)) { 1345 return 0; 1346 } 1347 char rgBuf[ULOC_RG_BUFLEN]; 1348 UErrorCode rgStatus = U_ZERO_ERROR; 1349 1350 // First check for rg keyword value 1351 int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); 1352 if (U_FAILURE(rgStatus) || rgLen != 6) { 1353 rgLen = 0; 1354 } else { 1355 // rgBuf guaranteed to be zero terminated here, with text len 6 1356 char *rgPtr = rgBuf; 1357 for (; *rgPtr!= 0; rgPtr++) { 1358 *rgPtr = uprv_toupper(*rgPtr); 1359 } 1360 rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0; 1361 } 1362 1363 if (rgLen == 0) { 1364 // No valid rg keyword value, try for unicode_region_subtag 1365 rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status); 1366 if (U_FAILURE(*status)) { 1367 rgLen = 0; 1368 } else if (rgLen == 0 && inferRegion) { 1369 // no unicode_region_subtag but inferRegion TRUE, try likely subtags 1370 char locBuf[ULOC_FULLNAME_CAPACITY]; 1371 rgStatus = U_ZERO_ERROR; 1372 (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus); 1373 if (U_SUCCESS(rgStatus)) { 1374 rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status); 1375 if (U_FAILURE(*status)) { 1376 rgLen = 0; 1377 } 1378 } 1379 } 1380 } 1381 1382 rgBuf[rgLen] = 0; 1383 uprv_strncpy(region, rgBuf, regionCapacity); 1384 return u_terminateChars(region, regionCapacity, rgLen, status); 1385} 1386 1387