dng_string.cpp revision 29c7498fabe2e3c87a85b487bfe9d783c401e1f0
1/*****************************************************************************/ 2// Copyright 2006-2007 Adobe Systems Incorporated 3// All Rights Reserved. 4// 5// NOTICE: Adobe permits you to use, modify, and distribute this file in 6// accordance with the terms of the Adobe license agreement accompanying it. 7/*****************************************************************************/ 8 9/* $Id: //mondo/dng_sdk_1_4/dng_sdk/source/dng_string.cpp#2 $ */ 10/* $DateTime: 2012/07/31 22:04:34 $ */ 11/* $Change: 840853 $ */ 12/* $Author: tknoll $ */ 13 14/*****************************************************************************/ 15 16#include "dng_string.h" 17 18#include "dng_assertions.h" 19#include "dng_exceptions.h" 20#include "dng_flags.h" 21#include "dng_mutex.h" 22#include "dng_utils.h" 23#include "dng_safe_arithmetic.h" 24 25#if qMacOS 26#include <TargetConditionals.h> 27#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 28#include <MobileCoreServices/MobileCoreServices.h> 29#else 30#include <CoreServices/CoreServices.h> 31#endif // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 32#endif // qMacOS 33 34#if qWinOS 35#include <windows.h> 36#endif 37 38#if qiPhone || qAndroid || qLinux 39#include <ctype.h> // for isdigit 40#endif 41 42/*****************************************************************************/ 43 44const uint32 kREPLACEMENT_CHARACTER = 0x0000FFFD; 45 46/*****************************************************************************/ 47 48// Returns the length of the zero-terminated string 's'. Throws a dng_exception 49// if the length of 's' is too large to be represented as a uint32_t. 50static uint32 strlenAsUint32(const char *s) 51 { 52 53 uint32 lengthAsUint32 = 0; 54 ConvertUnsigned(strlen(s), &lengthAsUint32); 55 56 return lengthAsUint32; 57 58 } 59 60// Checks whether there is enough space left in the buffer pointed to by 61// 'currentPos' to write at least 'space' elements of type T (to positions 62// currentPos[0] through currentPos[space - 1]. Throws a dng_exception if there 63// is not enough space left in the buffer. 64// 'bufferEnd' should point one element beyond the end of the buffer. For 65// example, if the buffer is "T buffer[3];", then bufferEnd should point to 66// T + 3. 67template <class T> 68static void CheckSpaceLeftInBuffer(const T *currentPos, 69 const T *bufferEnd, 70 size_t space) 71 { 72 73 if (bufferEnd < currentPos || static_cast<size_t>(bufferEnd - currentPos) < space) 74 { 75 ThrowMemoryFull ("Buffer overrun"); 76 } 77 78 } 79 80/*****************************************************************************/ 81 82// Throws an exception to notify the user of code that has not been security 83// hardened and prevent execution of that code. 84// 85// Though the DNG SDK in general has been security-hardened, this does not apply 86// to the following Mac-OS- and Windows-specific functions. Calls to 87// ThrowNotHardened() have been added to these functions to alert callers of 88// this fact. 89static void ThrowNotHardened() 90 { 91 ThrowProgramError ("This function has not been security-hardened"); 92 } 93 94#if qMacOS 95#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 96 97static uint32 Extract_SystemEncoding (const dng_string &dngString, 98 dng_memory_data &buffer) 99 { 100 ThrowProgramError ("Extract_SystemEncoding() not implemented on iOS"); 101 return 0; 102 } 103 104static void Assign_SystemEncoding (dng_string &dngString, 105 const char *otherString) 106 { 107 ThrowProgramError ("Assign_SystemEncoding() not implemented on iOS"); 108 109 } 110 111static void Assign_JIS_X208_1990 (dng_string &dngString, 112 const char *otherString) 113 { 114 ThrowProgramError ("Assign_JIS_X208_1990() not implemented on iOS"); 115 } 116 117#else 118 119static void Assign_Multibyte (dng_string &dngString, 120 const char *otherString, 121 TextEncoding encoding) 122 { 123 124 // This function contains security-vulnerable code. Do not use. 125 // The particular vulnerabilities are: 126 // - Casting the result of strlen() to a uint32 may case truncation. (Use 127 // strlenAsUint32() instead.) 128 // - The computation of aBufSize and the subsequent addition of 1 in the 129 // call to the dng_memory_data constructor may wrap around. 130 ThrowNotHardened(); 131 132 uint32 aSize = (uint32) strlen (otherString); 133 134 if (aSize > 0) 135 { 136 137 uint32 aBufSize = aSize * 6 + 256; 138 139 dng_memory_data aBuf (aBufSize + 1); 140 141 UnicodeMapping aMapping; 142 143 aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0, 144 kUnicodeNoSubset, 145 kUnicodeUTF8Format); 146 147 aMapping.otherEncoding = encoding; 148 aMapping.mappingVersion = kUnicodeUseLatestMapping; 149 150 TextToUnicodeInfo aInfo = NULL; 151 152 if (::CreateTextToUnicodeInfo (&aMapping, &aInfo) == noErr) 153 { 154 155 ByteCount aInput = 0; 156 ByteCount aOutput = 0; 157 158 ::ConvertFromTextToUnicode (aInfo, 159 aSize, 160 otherString, 161 kUnicodeUseFallbacksMask | 162 kUnicodeLooseMappingsMask, 163 0, 164 NULL, 165 NULL, 166 NULL, 167 aBufSize, 168 &aInput, 169 &aOutput, 170 (UniChar *) aBuf.Buffer ()); 171 172 ::DisposeTextToUnicodeInfo (&aInfo); 173 174 if (aOutput > 0 && aOutput <= aBufSize) 175 { 176 177 char *aBufChar = aBuf.Buffer_char (); 178 179 aBufChar [aOutput] = 0; 180 181 dngString.Set (aBufChar); 182 183 return; 184 185 } 186 187 } 188 189 } 190 191 dngString.Clear (); 192 193 } 194 195static uint32 Extract_Multibyte (const dng_string &dngString, 196 dng_memory_data &buffer, 197 TextEncoding encoding) 198 { 199 200 // This function contains security-vulnerable code. Do not use. 201 // The particular vulnerabilities are: 202 // - The computation of aBufSize may wrap around. 203 // - The computation of the argument to buffer.Allocate() may overflow; the 204 // conversion to uint32 is also problematic. 205 // - The signed-to-unsigned conversion in the return statement " 206 // return (uint32) aOutput;" may be problematic. 207 ThrowNotHardened(); 208 209 uint32 aSize = dngString.Length (); 210 211 if (aSize > 0) 212 { 213 214 uint32 aBufSize = (aSize * 2) + 256; 215 216 dng_memory_data tempBuffer (aBufSize); 217 218 UnicodeMapping aMapping; 219 220 aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0, 221 kUnicodeNoSubset, 222 kUnicodeUTF8Format); 223 224 aMapping.otherEncoding = encoding; 225 aMapping.mappingVersion = kUnicodeUseLatestMapping; 226 227 UnicodeToTextInfo aInfo = NULL; 228 229 if (::CreateUnicodeToTextInfo (&aMapping, &aInfo) == noErr) 230 { 231 232 ByteCount aInput = 0; 233 ByteCount aOutput = 0; 234 235 ::ConvertFromUnicodeToText (aInfo, 236 aSize, 237 (const UniChar *) dngString.Get (), 238 kUnicodeUseFallbacksMask | 239 kUnicodeLooseMappingsMask | 240 kUnicodeDefaultDirectionMask, 241 0, 242 NULL, 243 NULL, 244 NULL, 245 aBufSize, 246 &aInput, 247 &aOutput, 248 tempBuffer.Buffer_char ()); 249 250 ::DisposeUnicodeToTextInfo (&aInfo); 251 252 if (aOutput > 0) 253 { 254 255 buffer.Allocate ((uint32) (aOutput + 1)); 256 257 memcpy (buffer.Buffer (), 258 tempBuffer.Buffer (), 259 aOutput); 260 261 buffer.Buffer_char () [aOutput] = 0; 262 263 return (uint32) aOutput; 264 265 } 266 267 } 268 269 } 270 271 buffer.Allocate (1); 272 273 buffer.Buffer_char () [0] = 0; 274 275 return 0; 276 277 } 278 279static void Assign_SystemEncoding (dng_string &dngString, 280 const char *otherString) 281 { 282 283 TextEncoding aEncoding; 284 285 ::UpgradeScriptInfoToTextEncoding (smSystemScript, 286 kTextLanguageDontCare, 287 kTextRegionDontCare, 288 NULL, 289 &aEncoding); 290 291 Assign_Multibyte (dngString, 292 otherString, 293 aEncoding); 294 295 } 296 297static uint32 Extract_SystemEncoding (const dng_string &dngString, 298 dng_memory_data &buffer) 299 { 300 301 TextEncoding aEncoding; 302 303 ::UpgradeScriptInfoToTextEncoding (smSystemScript, 304 kTextLanguageDontCare, 305 kTextRegionDontCare, 306 NULL, 307 &aEncoding); 308 309 return Extract_Multibyte (dngString, 310 buffer, 311 aEncoding); 312 313 } 314 315static void Assign_JIS_X208_1990 (dng_string &dngString, 316 const char *otherString) 317 { 318 319 Assign_Multibyte (dngString, 320 otherString, 321 kTextEncodingJIS_X0208_90); 322 323 } 324 325#endif // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 326#endif // qMacOS 327 328/*****************************************************************************/ 329 330#if qWinOS 331 332static void Assign_Multibyte (dng_string &dngString, 333 const char *otherString, 334 UINT encoding) 335 { 336 337 // This function contains security-vulnerable code. Do not use. 338 // The particular vulnerabilities are: 339 // - Converting the return value of strlen() to int may cause overflow. 340 // - The computation of aBufChars and of the argument to the dng_memory_data 341 // constructor may overflow. Additionally, there is an implicit 342 // signed-to-unsigned conversion in the call to the dng_memory_data 343 // constructor. 344 ThrowNotHardened(); 345 346 DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes"); 347 348 int aSize = (int) strlen (otherString); 349 350 if (aSize > 0) 351 { 352 353 int aBufChars = aSize * 3 + 128; 354 355 dng_memory_data aBuf ((aBufChars + 1) << 1); 356 357 int aResult = ::MultiByteToWideChar (encoding, 358 0, 359 otherString, 360 aSize, 361 (WCHAR *) aBuf.Buffer (), 362 aBufChars); 363 364 if (aResult > 0 && aResult <= aBufChars) 365 { 366 367 uint16 * aUTF16 = aBuf.Buffer_uint16 (); 368 369 aUTF16 [aResult] = 0; 370 371 dngString.Set_UTF16 (aUTF16); 372 373 return; 374 375 } 376 377 } 378 379 dngString.Clear (); 380 381 } 382 383static uint32 Extract_Multibyte (const dng_string &dngString, 384 dng_memory_data &buffer, 385 UINT encoding) 386 { 387 388 // This function contains security-vulnerable code. Do not use. 389 // The particular vulnerabilities are: 390 // - Converting the return value of dngString.Get_UTF16() may cause 391 // overflow. 392 // - The computation of dBufSize may overflow. 393 // - The calls to the dng_memory_data constructor and to buffer.Allocate() 394 // trigger implicit conversions of int to uint32 that may be problematic. 395 // - The memcpy() call triggers an implicit conversion of aResult to a 396 // size_t, which may be problematic. 397 // - The conversion of aResult to a uint32 in the return statement may be 398 // problematic. 399 ThrowNotHardened(); 400 401 DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes"); 402 403 dng_memory_data sBuffer; 404 405 int aCount = dngString.Get_UTF16 (sBuffer); 406 407 int dBufSize = aCount * 2 + 256; 408 409 dng_memory_data dBuffer (dBufSize); 410 411 int aResult = ::WideCharToMultiByte (encoding, 412 0, 413 (WCHAR *) sBuffer.Buffer (), 414 aCount, 415 dBuffer.Buffer_char (), 416 dBufSize, 417 NULL, 418 NULL); 419 420 if (aResult < 0) 421 aResult = 0; 422 423 buffer.Allocate (aResult + 1); 424 425 memcpy (buffer.Buffer (), 426 dBuffer.Buffer (), 427 aResult); 428 429 buffer.Buffer_char () [aResult] = 0; 430 431 return (uint32) aResult; 432 433 } 434 435static void Assign_SystemEncoding (dng_string &dngString, 436 const char *otherString) 437 { 438 439 Assign_Multibyte (dngString, 440 otherString, 441 ::GetACP ()); 442 443 } 444 445static uint32 Extract_SystemEncoding (const dng_string &dngString, 446 dng_memory_data &buffer) 447 { 448 449 return Extract_Multibyte (dngString, 450 buffer, 451 ::GetACP ()); 452 453 } 454 455static void Assign_JIS_X208_1990 (dng_string &dngString, 456 const char *otherString) 457 { 458 459 // From MSDN documentation: 20932 = JIS X 0208-1990 & 0121-1990 460 461 const UINT kJIS = 20932; 462 463 Assign_Multibyte (dngString, 464 otherString, 465 kJIS); 466 467 } 468 469#endif 470 471/*****************************************************************************/ 472 473static bool IsASCII (const char *s) 474 { 475 476 if (!s) 477 { 478 479 return true; 480 481 } 482 483 while (true) 484 { 485 486 uint8 c = (uint8) *(s++); 487 488 if (c == 0) 489 { 490 491 break; 492 493 } 494 495 if (c & 0x80) 496 { 497 498 return false; 499 500 } 501 502 } 503 504 return true; 505 506 } 507 508/*****************************************************************************/ 509 510dng_string::dng_string () 511 512 : fData () 513 514 { 515 516 } 517 518/*****************************************************************************/ 519 520dng_string::dng_string (const dng_string &s) 521 522 : fData () 523 524 { 525 526 Set (s.Get ()); 527 528 } 529 530/*****************************************************************************/ 531 532dng_string & dng_string::operator= (const dng_string &s) 533 { 534 535 if (this != &s) 536 { 537 538 Set (s.Get ()); 539 540 } 541 542 return *this; 543 544 } 545 546/*****************************************************************************/ 547 548dng_string::~dng_string () 549 { 550 551 } 552 553/*****************************************************************************/ 554 555const char * dng_string::Get () const 556 { 557 558 if (fData.Buffer ()) 559 { 560 561 return fData.Buffer_char (); 562 563 } 564 565 return ""; 566 567 } 568 569/*****************************************************************************/ 570 571bool dng_string::IsASCII () const 572 { 573 574 return ::IsASCII (Get ()); 575 576 } 577 578/*****************************************************************************/ 579 580void dng_string::Set (const char *s) 581 { 582 583 // Measure the new length. 584 585 uint32 newLen = (s != NULL ? strlenAsUint32 (s) : 0); 586 587 // If it is a NULL string, then clear the buffer. 588 589 if (newLen == 0) 590 { 591 592 fData.Clear (); 593 594 } 595 596 // Else we need to copy the bytes. 597 598 else 599 { 600 601 uint32 oldLen = Length (); 602 603 // We might be setting this string to a sub-string of itself, 604 // so don't reallocate the data unless the string is getting 605 // longer. 606 607 if (newLen > oldLen) 608 { 609 610 fData.Clear (); 611 612 fData.Allocate (SafeUint32Add (newLen, 1)); 613 614 } 615 616 char *d = fData.Buffer_char (); 617 618 for (uint32 k = 0; k <= newLen; k++) 619 { 620 621 d [k] = s [k]; 622 623 } 624 625 } 626 627 } 628 629/*****************************************************************************/ 630 631void dng_string::Set_ASCII (const char *s) 632 { 633 634 if (::IsASCII (s)) 635 { 636 637 Set (s); 638 639 } 640 641 else 642 { 643 644 Set_SystemEncoding (s); 645 646 } 647 648 } 649 650/*****************************************************************************/ 651 652void dng_string::Set_UTF8 (const char *s) 653 { 654 655 uint32 len = strlenAsUint32 (s); 656 657 const char *sEnd = s + len; 658 659 // Worst case expansion is 1-byte characters expanding to 660 // replacement character, which requires 3 bytes. 661 662 const uint32 destBufferLength = SafeUint32Add (SafeUint32Mult (len, 3), 1); 663 dng_memory_data buffer (destBufferLength); 664 665 uint8 *d = buffer.Buffer_uint8 (); 666 uint8 * const destEnd = d + destBufferLength; 667 668 while (s < sEnd) 669 { 670 671 uint32 aChar = DecodeUTF8 (s, (uint32) (sEnd - s)); 672 673 if (aChar > 0x7FFFFFFF) 674 { 675 aChar = kREPLACEMENT_CHARACTER; 676 } 677 678 #if qDNGValidate 679 680 if (aChar == kREPLACEMENT_CHARACTER) 681 { 682 ReportWarning ("Expected UTF-8 value is not valid UTF-8 (or contains a kREPLACEMENT_CHARACTER)"); 683 } 684 685 #endif 686 687 if (aChar < 0x00000080) 688 { 689 CheckSpaceLeftInBuffer (d, destEnd, 1); 690 *(d++) = (uint8) aChar; 691 } 692 693 else if (aChar < 0x00000800) 694 { 695 CheckSpaceLeftInBuffer (d, destEnd, 2); 696 *(d++) = (uint8) ((aChar >> 6) | 0x000000C0); 697 *(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080); 698 } 699 700 else if (aChar < 0x00010000) 701 { 702 CheckSpaceLeftInBuffer (d, destEnd, 3); 703 *(d++) = (uint8) ( (aChar >> 12) | 0x000000E0); 704 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 705 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 706 } 707 708 else if (aChar < 0x00200000) 709 { 710 CheckSpaceLeftInBuffer (d, destEnd, 4); 711 *(d++) = (uint8) ( (aChar >> 18) | 0x000000F0); 712 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 713 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 714 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 715 } 716 717 else if (aChar < 0x04000000) 718 { 719 CheckSpaceLeftInBuffer (d, destEnd, 5); 720 *(d++) = (uint8) ( (aChar >> 24) | 0x000000F8); 721 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 722 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 723 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 724 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 725 } 726 727 else 728 { 729 CheckSpaceLeftInBuffer (d, destEnd, 6); 730 *(d++) = (uint8) ( (aChar >> 30) | 0x000000FC); 731 *(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080); 732 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 733 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 734 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 735 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 736 } 737 738 } 739 740 CheckSpaceLeftInBuffer (d, destEnd, 1); 741 *d = 0; 742 743 Set (buffer.Buffer_char ()); 744 745 } 746 747/*****************************************************************************/ 748 749uint32 dng_string::Get_SystemEncoding (dng_memory_data &buffer) const 750 { 751 752 if (IsASCII ()) 753 { 754 755 uint32 len = Length (); 756 757 const uint32 destBufferLength = SafeUint32Add (len, 1); 758 buffer.Allocate (destBufferLength); 759 760 memcpy (buffer.Buffer (), Get (), destBufferLength); 761 762 return len; 763 764 } 765 766 else 767 { 768 769 #if qMacOS || qWinOS 770 771 return Extract_SystemEncoding (*this, buffer); 772 773 #else 774 775 // Fallback logic to force the string to ASCII. 776 777 dng_string temp (*this); 778 779 temp.ForceASCII (); 780 781 return temp.Get_SystemEncoding (buffer); 782 783 #endif 784 785 } 786 787 } 788 789/*****************************************************************************/ 790 791void dng_string::Set_SystemEncoding (const char *s) 792 { 793 794 if (::IsASCII (s)) 795 { 796 797 Set (s); 798 799 } 800 801 else 802 { 803 804 #if qMacOS || qWinOS 805 806 Assign_SystemEncoding (*this, s); 807 808 #else 809 810 // Fallback logic that just grabs the ASCII characters and 811 // ignores the non-ASCII characters. 812 813 uint32 len = strlenAsUint32 (s); 814 815 const uint32 destBufferLength = SafeUint32Add (len, 1); 816 dng_memory_data buffer (destBufferLength); 817 818 uint8 *d = buffer.Buffer_uint8 (); 819 uint8 * const destEnd = d + destBufferLength; 820 821 while (*s) 822 { 823 824 uint8 c = (uint8) *(s++); 825 826 if ((c & 0x80) == 0) 827 { 828 829 CheckSpaceLeftInBuffer (d, destEnd, 1); 830 *(d++) = c; 831 832 } 833 834 } 835 836 CheckSpaceLeftInBuffer (d, destEnd, 1); 837 *d = 0; 838 839 Set (buffer.Buffer_char ()); 840 841 #endif 842 843 } 844 845 } 846 847/*****************************************************************************/ 848 849bool dng_string::ValidSystemEncoding () const 850 { 851 852 if (IsASCII ()) 853 { 854 855 return true; 856 857 } 858 859 dng_memory_data buffer; 860 861 Get_SystemEncoding (buffer); 862 863 dng_string temp; 864 865 temp.Set_SystemEncoding (buffer.Buffer_char ()); 866 867 return (*this == temp); 868 869 } 870 871/*****************************************************************************/ 872 873void dng_string::Set_JIS_X208_1990 (const char *s) 874 { 875 876 if (::IsASCII (s)) 877 { 878 879 Set (s); 880 881 } 882 883 else 884 { 885 886 #if qMacOS || qWinOS 887 888 Assign_JIS_X208_1990 (*this, s); 889 890 #else 891 892 // Fallback to the ASCII extraction logic. 893 894 Set_SystemEncoding (s); 895 896 #endif 897 898 } 899 900 } 901 902/*****************************************************************************/ 903 904#if defined(__clang__) && defined(__has_attribute) 905#if __has_attribute(no_sanitize) 906__attribute__((no_sanitize("unsigned-integer-overflow"))) 907#endif 908#endif 909uint32 dng_string::DecodeUTF8 (const char *&s, 910 uint32 maxBytes, 911 bool *isValid) 912 { 913 914 static const uint8 gUTF8Bytes [256] = 915 { 916 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 917 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 918 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 919 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 920 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 921 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 922 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 923 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6 924 }; 925 926 if (isValid) 927 { 928 *isValid = true; 929 } 930 931 const uint8 *nBuf = (const uint8 *) s; 932 933 uint32 aChar = nBuf [0]; 934 935 uint32 aSize = gUTF8Bytes [aChar]; 936 937 if (aSize > maxBytes) 938 { 939 940 s += maxBytes; 941 942 if (isValid) 943 { 944 *isValid = false; 945 } 946 947 return kREPLACEMENT_CHARACTER; 948 949 } 950 951 s += aSize; 952 953 for (uint32 extra = 1; extra < aSize; extra++) 954 { 955 956 if ((nBuf [extra] & 0xC0) != 0x80) 957 { 958 959 if (isValid) 960 { 961 *isValid = false; 962 } 963 964 return kREPLACEMENT_CHARACTER; 965 966 } 967 968 } 969 970 switch (aSize) 971 { 972 973 case 0: 974 { 975 976 s++; // Don't get stuck in infinite loop 977 978 if (isValid) 979 { 980 *isValid = false; 981 } 982 983 return kREPLACEMENT_CHARACTER; 984 985 } 986 987 case 1: 988 { 989 990 return aChar; 991 992 } 993 994 case 2: 995 { 996 997 aChar = ((aChar << 6) + nBuf [1]) - (uint32) 0x00003080UL; 998 999 break; 1000 1001 } 1002 1003 case 3: 1004 { 1005 1006 aChar = ((((aChar << 6) + nBuf [1]) 1007 << 6) + nBuf [2]) - (uint32) 0x000E2080UL; 1008 1009 break; 1010 1011 } 1012 1013 case 4: 1014 { 1015 1016 aChar = ((((((aChar << 6) + nBuf [1]) 1017 << 6) + nBuf [2]) 1018 << 6) + nBuf [3]) - (uint32) 0x03C82080UL; 1019 1020 break; 1021 1022 } 1023 1024 case 5: 1025 { 1026 1027 aChar = ((((((((aChar << 6) + nBuf [1]) 1028 << 6) + nBuf [2]) 1029 << 6) + nBuf [3]) 1030 << 6) + nBuf [4]) - (uint32) 0xFA082080UL; 1031 1032 break; 1033 1034 } 1035 1036 case 6: 1037 { 1038 1039 aChar = ((((((((((aChar << 6) + nBuf [1]) 1040 << 6) + nBuf [2]) 1041 << 6) + nBuf [3]) 1042 << 6) + nBuf [4]) 1043 << 6) + nBuf [5]) - (uint32) 0x82082080UL; 1044 1045 break; 1046 1047 } 1048 1049 } 1050 1051 if (aChar < 0x7F || aChar > 0x0010FFFF) 1052 { 1053 1054 if (isValid) 1055 { 1056 *isValid = false; 1057 } 1058 1059 return kREPLACEMENT_CHARACTER; 1060 1061 } 1062 1063 return aChar; 1064 1065 } 1066 1067/*****************************************************************************/ 1068 1069bool dng_string::IsUTF8 (const char *s) 1070 { 1071 1072 uint32 len = strlenAsUint32 (s); 1073 1074 const char *sEnd = s + len; 1075 1076 while (s < sEnd) 1077 { 1078 1079 bool isValid = true; 1080 1081 (void) DecodeUTF8 (s, (uint32) (sEnd - s), &isValid); 1082 1083 if (!isValid) 1084 { 1085 return false; 1086 } 1087 1088 } 1089 1090 return true; 1091 1092 } 1093 1094/*****************************************************************************/ 1095 1096void dng_string::Set_UTF8_or_System (const char *s) 1097 { 1098 1099 if (::IsASCII (s)) 1100 { 1101 1102 Set (s); 1103 1104 } 1105 1106 else if (IsUTF8 (s)) 1107 { 1108 1109 Set_UTF8 (s); 1110 1111 } 1112 1113 else 1114 { 1115 1116 Set_SystemEncoding (s); 1117 1118 } 1119 1120 } 1121 1122/*****************************************************************************/ 1123 1124uint32 dng_string::Get_UTF16 (dng_memory_data &buffer) const 1125 { 1126 1127 uint32 count = 0; 1128 1129 const char *sPtr = Get (); 1130 1131 while (*sPtr) 1132 { 1133 1134 uint32 x = DecodeUTF8 (sPtr); 1135 1136 if (x <= 0x0000FFFF || 1137 x > 0x0010FFFF) 1138 { 1139 1140 count = SafeUint32Add (count, 1); 1141 1142 } 1143 1144 else 1145 { 1146 1147 count = SafeUint32Add (count, 2); 1148 1149 } 1150 1151 } 1152 1153 const uint32 destBufferLength = SafeUint32Add (count, 1); 1154 buffer.Allocate (destBufferLength, sizeof (uint16)); 1155 1156 uint16 *dPtr = buffer.Buffer_uint16 (); 1157 uint16 * const destEnd = dPtr + destBufferLength; 1158 1159 sPtr = Get (); 1160 1161 while (*sPtr) 1162 { 1163 1164 uint32 x = DecodeUTF8 (sPtr); 1165 1166 if (x <= 0x0000FFFF) 1167 { 1168 1169 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1170 *(dPtr++) = (uint16) x; 1171 1172 } 1173 1174 else if (x > 0x0010FFFF) 1175 { 1176 1177 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1178 *(dPtr++) = (uint16) kREPLACEMENT_CHARACTER; 1179 1180 } 1181 1182 else 1183 { 1184 1185 x -= 0x00010000; 1186 1187 CheckSpaceLeftInBuffer (dPtr, destEnd, 2); 1188 *(dPtr++) = (uint16) ((x >> 10 ) + 0x0000D800); 1189 *(dPtr++) = (uint16) ((x & 0x000003FF) + 0x0000DC00); 1190 1191 } 1192 1193 } 1194 1195 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1196 *dPtr = 0; 1197 1198 return count; 1199 1200 } 1201 1202/*****************************************************************************/ 1203 1204void dng_string::Set_UTF16 (const uint16 *s) 1205 { 1206 1207 if (!s) 1208 { 1209 Clear (); 1210 return; 1211 } 1212 1213 bool swap = false; 1214 1215 if (s [0] == 0xFFFE) // Swapped byte order marker 1216 { 1217 swap = true; 1218 s++; 1219 } 1220 1221 else if (s [0] == 0xFEFF) // Non-swapped byte order marker 1222 { 1223 s++; 1224 } 1225 1226 uint32 length16 = 0; 1227 1228 while (s [length16] != 0) 1229 { 1230 length16 = SafeUint32Add (length16, 1); 1231 } 1232 1233 const uint16 *sEnd = s + length16; 1234 1235 const uint32 destBufferSize = 1236 SafeUint32Add (SafeUint32Mult (length16, 6), 1); 1237 dng_memory_data buffer (destBufferSize); 1238 1239 uint8 *d = buffer.Buffer_uint8 (); 1240 uint8 * const destEnd = d + destBufferSize; 1241 1242 while (s < sEnd) 1243 { 1244 1245 uint32 aChar = *s++; 1246 1247 if (swap) 1248 { 1249 aChar = ((aChar << 8) | (aChar >> 8)) & 0x0000FFFF; 1250 } 1251 1252 if ((aChar >= 0x0000D800) && (aChar <= 0x0000DBFF) && (s < sEnd)) 1253 { 1254 1255 uint32 aLow = *s; 1256 1257 if (swap) 1258 { 1259 aLow = ((aLow << 8) | (aLow >> 8)) & 0x0000FFFF; 1260 } 1261 1262 if ((aLow >= 0x0000DC00) && (aLow <= 0x0000DFFF)) 1263 { 1264 1265 aChar = ((aChar - 0x0000D800) << 10) + 1266 (aLow - 0x0000DC00) + 1267 0x00010000; 1268 1269 s++; 1270 1271 } 1272 1273 } 1274 1275 if (aChar > 0x7FFFFFFF) 1276 { 1277 aChar = kREPLACEMENT_CHARACTER; 1278 } 1279 1280 if (aChar < 0x00000080) 1281 { 1282 CheckSpaceLeftInBuffer (d, destEnd, 1); 1283 *(d++) = (uint8) aChar; 1284 } 1285 1286 else if (aChar < 0x00000800) 1287 { 1288 CheckSpaceLeftInBuffer (d, destEnd, 2); 1289 *(d++) = (uint8) ((aChar >> 6) | 0x000000C0); 1290 *(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080); 1291 } 1292 1293 else if (aChar < 0x00010000) 1294 { 1295 CheckSpaceLeftInBuffer (d, destEnd, 3); 1296 *(d++) = (uint8) ( (aChar >> 12) | 0x000000E0); 1297 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1298 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1299 } 1300 1301 else if (aChar < 0x00200000) 1302 { 1303 CheckSpaceLeftInBuffer (d, destEnd, 4); 1304 *(d++) = (uint8) ( (aChar >> 18) | 0x000000F0); 1305 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1306 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1307 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1308 } 1309 1310 else if (aChar < 0x04000000) 1311 { 1312 CheckSpaceLeftInBuffer (d, destEnd, 5); 1313 *(d++) = (uint8) ( (aChar >> 24) | 0x000000F8); 1314 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 1315 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1316 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1317 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1318 } 1319 1320 else 1321 { 1322 CheckSpaceLeftInBuffer (d, destEnd, 6); 1323 *(d++) = (uint8) ( (aChar >> 30) | 0x000000FC); 1324 *(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080); 1325 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 1326 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1327 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1328 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1329 } 1330 1331 } 1332 1333 CheckSpaceLeftInBuffer (d, destEnd, 1); 1334 *d = 0; 1335 1336 Set (buffer.Buffer_char ()); 1337 1338 } 1339 1340/*****************************************************************************/ 1341 1342void dng_string::Clear () 1343 { 1344 1345 Set (NULL); 1346 1347 } 1348 1349/*****************************************************************************/ 1350 1351void dng_string::Truncate (uint32 maxBytes) 1352 { 1353 1354 uint32 len = Length (); 1355 1356 if (len > maxBytes) 1357 { 1358 1359 uint8 *s = fData.Buffer_uint8 (); 1360 1361 // Don't truncate on an extension character. Extensions characters 1362 // in UTF-8 have the 0x80 bit set and the 0x40 bit clear. 1363 1364 while (maxBytes > 0 && ((s [maxBytes]) & 0xC0) == 0x80) 1365 { 1366 1367 maxBytes--; 1368 1369 } 1370 1371 s [maxBytes] = 0; 1372 1373 } 1374 1375 } 1376 1377/*****************************************************************************/ 1378 1379bool dng_string::TrimTrailingBlanks () 1380 { 1381 1382 bool didTrim = false; 1383 1384 if (fData.Buffer ()) 1385 { 1386 1387 char *s = fData.Buffer_char (); 1388 1389 uint32 len = strlenAsUint32 (s); 1390 1391 while (len > 0 && s [len - 1] == ' ') 1392 { 1393 len--; 1394 didTrim = true; 1395 } 1396 1397 s [len] = 0; 1398 1399 } 1400 1401 return didTrim; 1402 1403 } 1404 1405/*****************************************************************************/ 1406 1407bool dng_string::TrimLeadingBlanks () 1408 { 1409 1410 bool didTrim = false; 1411 1412 const char *s = Get (); 1413 1414 while (*s == ' ') 1415 { 1416 s++; 1417 didTrim = true; 1418 } 1419 1420 if (didTrim) 1421 { 1422 Set (s); 1423 } 1424 1425 return didTrim; 1426 1427 } 1428 1429/*****************************************************************************/ 1430 1431bool dng_string::IsEmpty () const 1432 { 1433 1434 const char *s = Get (); 1435 1436 return *s == 0; 1437 1438 } 1439 1440/*****************************************************************************/ 1441 1442uint32 dng_string::Length () const 1443 { 1444 1445 const char *s = Get (); 1446 1447 return strlenAsUint32 (s); 1448 1449 } 1450 1451/*****************************************************************************/ 1452 1453bool dng_string::operator== (const dng_string &s) const 1454 { 1455 1456 const char *s1 = Get (); 1457 const char *s2 = s.Get (); 1458 1459 return strcmp (s1, s2) == 0; 1460 1461 } 1462 1463/*****************************************************************************/ 1464 1465bool dng_string::Matches (const char *t, 1466 const char *s, 1467 bool case_sensitive) 1468 { 1469 1470 while (*s != 0) 1471 { 1472 1473 char c1 = *(s++); 1474 char c2 = *(t++); 1475 1476 if (!case_sensitive) 1477 { 1478 c1 = ForceUppercase (c1); 1479 c2 = ForceUppercase (c2); 1480 } 1481 1482 if (c1 != c2) 1483 { 1484 return false; 1485 } 1486 1487 } 1488 1489 return (*t == 0); 1490 1491 } 1492 1493/*****************************************************************************/ 1494 1495bool dng_string::Matches (const char *s, 1496 bool case_sensitive) const 1497 { 1498 1499 return dng_string::Matches (Get (), s, case_sensitive); 1500 1501 } 1502 1503/*****************************************************************************/ 1504 1505bool dng_string::StartsWith (const char *s, 1506 bool case_sensitive) const 1507 { 1508 1509 const char *t = Get (); 1510 1511 while (*s != 0) 1512 { 1513 1514 char c1 = *(s++); 1515 char c2 = *(t++); 1516 1517 if (!case_sensitive) 1518 { 1519 c1 = ForceUppercase (c1); 1520 c2 = ForceUppercase (c2); 1521 } 1522 1523 if (c1 != c2) 1524 { 1525 return false; 1526 } 1527 1528 } 1529 1530 return true; 1531 1532 } 1533 1534/*****************************************************************************/ 1535 1536bool dng_string::EndsWith (const char *s, 1537 bool case_sensitive) const 1538 { 1539 1540 uint32 len1 = Length (); 1541 1542 uint32 len2 = strlenAsUint32 (s); 1543 1544 if (len1 < len2) 1545 { 1546 return false; 1547 } 1548 1549 const char *t = Get () + (len1 - len2); 1550 1551 while (*s != 0) 1552 { 1553 1554 char c1 = *(s++); 1555 char c2 = *(t++); 1556 1557 if (!case_sensitive) 1558 { 1559 c1 = ForceUppercase (c1); 1560 c2 = ForceUppercase (c2); 1561 } 1562 1563 if (c1 != c2) 1564 { 1565 return false; 1566 } 1567 1568 } 1569 1570 return true; 1571 1572 } 1573 1574/*****************************************************************************/ 1575 1576bool dng_string::Contains (const char *s, 1577 bool case_sensitive, 1578 int32 *match_offset) const 1579 { 1580 1581 if (match_offset) 1582 { 1583 *match_offset = -1; 1584 } 1585 1586 uint32 len1 = Length (); 1587 1588 uint32 len2 = strlenAsUint32 (s); 1589 1590 if (len1 < len2) 1591 { 1592 return false; 1593 } 1594 1595 uint32 offsets = len1 - len2; 1596 1597 for (uint32 offset = 0; offset <= offsets; offset++) 1598 { 1599 1600 const char *ss = s; 1601 const char *tt = Get () + offset; 1602 1603 while (*ss != 0) 1604 { 1605 1606 char c1 = *(ss++); 1607 char c2 = *(tt++); 1608 1609 if (!case_sensitive) 1610 { 1611 c1 = ForceUppercase (c1); 1612 c2 = ForceUppercase (c2); 1613 } 1614 1615 if (c1 != c2) 1616 { 1617 goto tryNextOffset; 1618 } 1619 1620 } 1621 1622 if (match_offset) 1623 { 1624 *match_offset = offset; 1625 } 1626 1627 return true; 1628 1629 tryNextOffset: ; 1630 1631 } 1632 1633 return false; 1634 1635 } 1636 1637/*****************************************************************************/ 1638 1639bool dng_string::Replace (const char *old_string, 1640 const char *new_string, 1641 bool case_sensitive) 1642 { 1643 1644 int32 match_offset = -1; 1645 1646 if (Contains (old_string, 1647 case_sensitive, 1648 &match_offset)) 1649 { 1650 1651 uint32 len1 = Length (); 1652 1653 uint32 len2 = strlenAsUint32 (old_string); 1654 uint32 len3 = strlenAsUint32 (new_string); 1655 1656 if (len2 == len3) 1657 { 1658 1659 strncpy (fData.Buffer_char () + match_offset, 1660 new_string, 1661 len3); 1662 1663 } 1664 1665 else if (len2 > len3) 1666 { 1667 1668 strncpy (fData.Buffer_char () + match_offset, 1669 new_string, 1670 len3); 1671 1672 const char *s = fData.Buffer_char () + match_offset + len2; 1673 char *d = fData.Buffer_char () + match_offset + len3; 1674 1675 uint32 extra = len1 - match_offset - len2 + 1; // + 1 for NULL termination 1676 1677 for (uint32 j = 0; j < extra; j++) 1678 { 1679 *(d++) = *(s++); 1680 } 1681 1682 } 1683 1684 else 1685 { 1686 1687 // "len1 - len2" cannot wrap around because we know that if this 1688 // string contains old_string, len1 >= len2 must hold. 1689 dng_memory_data tempBuffer ( 1690 SafeUint32Add (SafeUint32Add (len1 - len2, len3), 1)); 1691 1692 if (match_offset) 1693 { 1694 1695 strncpy (tempBuffer.Buffer_char (), 1696 fData .Buffer_char (), 1697 match_offset); 1698 1699 } 1700 1701 if (len3) 1702 { 1703 1704 strncpy (tempBuffer.Buffer_char () + match_offset, 1705 new_string, 1706 len3); 1707 1708 } 1709 1710 uint32 extra = len1 - match_offset - len2 + 1; // + 1 for NULL termination 1711 1712 strncpy (tempBuffer.Buffer_char () + match_offset + len3, 1713 fData .Buffer_char () + match_offset + len2, 1714 extra); 1715 1716 Set (tempBuffer.Buffer_char ()); 1717 1718 } 1719 1720 return true; 1721 1722 } 1723 1724 return false; 1725 1726 } 1727 1728/*****************************************************************************/ 1729 1730bool dng_string::TrimLeading (const char *s, 1731 bool case_sensitive) 1732 { 1733 1734 if (StartsWith (s, case_sensitive)) 1735 { 1736 1737 Set (Get () + strlenAsUint32 (s)); 1738 1739 return true; 1740 1741 } 1742 1743 return false; 1744 1745 } 1746 1747/*****************************************************************************/ 1748 1749void dng_string::Append (const char *s) 1750 { 1751 1752 uint32 len2 = strlenAsUint32 (s); 1753 1754 if (len2) 1755 { 1756 1757 uint32 len1 = Length (); 1758 1759 dng_memory_data temp (SafeUint32Add (SafeUint32Add (len1, len2), 1)); 1760 1761 char *buffer = temp.Buffer_char (); 1762 1763 if (len1) 1764 { 1765 memcpy (buffer, Get (), len1); 1766 } 1767 1768 memcpy (buffer + len1, s, len2 + 1); 1769 1770 Set (buffer); 1771 1772 } 1773 1774 } 1775 1776/*****************************************************************************/ 1777 1778void dng_string::SetUppercase () 1779 { 1780 1781 if (fData.Buffer ()) 1782 { 1783 1784 uint32 len = Length (); 1785 1786 char *dPtr = fData.Buffer_char (); 1787 1788 for (uint32 j = 0; j < len; j++) 1789 { 1790 1791 char c = dPtr [j]; 1792 1793 if (c >= 'a' && c <= 'z') 1794 { 1795 1796 dPtr [j] = c - 'a' + 'A'; 1797 1798 } 1799 1800 } 1801 1802 } 1803 1804 } 1805 1806/*****************************************************************************/ 1807 1808void dng_string::SetLowercase () 1809 { 1810 1811 if (fData.Buffer ()) 1812 { 1813 1814 uint32 len = Length (); 1815 1816 char *dPtr = fData.Buffer_char (); 1817 1818 for (uint32 j = 0; j < len; j++) 1819 { 1820 1821 char c = dPtr [j]; 1822 1823 if (c >= 'A' && c <= 'Z') 1824 { 1825 1826 dPtr [j] = c - 'A' + 'a'; 1827 1828 } 1829 1830 } 1831 1832 } 1833 1834 } 1835 1836/*****************************************************************************/ 1837 1838void dng_string::SetLineEndings (char ending) 1839 { 1840 1841 if (fData.Buffer ()) 1842 { 1843 1844 const char *sPtr = fData.Buffer_char (); 1845 char *dPtr = fData.Buffer_char (); 1846 1847 while (*sPtr) 1848 { 1849 1850 char c = *(sPtr++); 1851 1852 char nc = sPtr [0]; 1853 1854 if ((c == '\r' && nc == '\n') || 1855 (c == '\n' && nc == '\r')) 1856 { 1857 1858 sPtr++; 1859 1860 if (ending) 1861 { 1862 *(dPtr++) = ending; 1863 } 1864 1865 } 1866 1867 else if (c == '\n' || 1868 c == '\r') 1869 { 1870 1871 if (ending) 1872 { 1873 *(dPtr++) = ending; 1874 } 1875 1876 } 1877 1878 else 1879 { 1880 1881 *(dPtr++) = c; 1882 1883 } 1884 1885 } 1886 1887 *dPtr = 0; 1888 1889 } 1890 1891 } 1892 1893/*****************************************************************************/ 1894 1895void dng_string::StripLowASCII () 1896 { 1897 1898 if (fData.Buffer ()) 1899 { 1900 1901 const char *sPtr = fData.Buffer_char (); 1902 char *dPtr = fData.Buffer_char (); 1903 1904 while (*sPtr) 1905 { 1906 1907 char c = *(sPtr++); 1908 1909 if (c == '\r' || c == '\n' || (uint8) c >= ' ') 1910 { 1911 1912 *(dPtr++) = c; 1913 1914 } 1915 1916 } 1917 1918 *dPtr = 0; 1919 1920 } 1921 1922 } 1923 1924/*****************************************************************************/ 1925 1926void dng_string::NormalizeAsCommaSeparatedNumbers () 1927 { 1928 1929 if (fData.Buffer ()) 1930 { 1931 1932 const char *sPtr = fData.Buffer_char (); 1933 char *dPtr = fData.Buffer_char (); 1934 1935 bool commaInserted = false; 1936 1937 while (*sPtr) 1938 { 1939 1940 uint32 c = DecodeUTF8 (sPtr); 1941 1942 // Support number formats such as "3", "+3.0", "-3.1416", "314.16e-2", 1943 // "0.31416E1", but no hex/octal number representations. 1944 1945 if (isdigit ((int) c) || c == '.' || c == '-' || c == '+' || c == 'e' || c == 'E') 1946 { 1947 1948 *(dPtr++) = (char) c; 1949 1950 if (commaInserted) 1951 { 1952 1953 commaInserted = false; 1954 1955 } 1956 1957 } 1958 1959 else if (!commaInserted) 1960 { 1961 1962 *(dPtr++) = ','; 1963 1964 commaInserted = true; 1965 1966 } 1967 1968 } 1969 1970 *dPtr = 0; 1971 1972 } 1973 1974 } 1975 1976/******************************************************************************/ 1977 1978// Unicode to low-ASCII strings table. 1979 1980struct UnicodeToLowASCIIEntry 1981 { 1982 uint32 unicode; 1983 const char *ascii; 1984 }; 1985 1986static const UnicodeToLowASCIIEntry kUnicodeToLowASCII [] = 1987 { 1988 { 0x00A0, " " }, 1989 { 0x00A1, "!" }, 1990 { 0x00A9, "(C)" }, 1991 { 0x00AA, "a" }, 1992 { 0x00AB, "<<" }, 1993 { 0x00AC, "!" }, 1994 { 0x00AE, "(R)" }, 1995 { 0x00B0, "dg" }, 1996 { 0x00B1, "+-" }, 1997 { 0x00B7, "." }, 1998 { 0x00BA, "o" }, 1999 { 0x00BB, ">>" }, 2000 { 0x00BF, "?" }, 2001 { 0x00C0, "A" }, 2002 { 0x00C1, "A" }, 2003 { 0x00C2, "A" }, 2004 { 0x00C3, "A" }, 2005 { 0x00C4, "A" }, 2006 { 0x00C5, "A" }, 2007 { 0x00C6, "AE" }, 2008 { 0x00C7, "C" }, 2009 { 0x00C8, "E" }, 2010 { 0x00C9, "E" }, 2011 { 0x00CA, "E" }, 2012 { 0x00CB, "E" }, 2013 { 0x00CC, "I" }, 2014 { 0x00CD, "I" }, 2015 { 0x00CE, "I" }, 2016 { 0x00CF, "I" }, 2017 { 0x00D1, "N" }, 2018 { 0x00D2, "O" }, 2019 { 0x00D3, "O" }, 2020 { 0x00D4, "O" }, 2021 { 0x00D5, "O" }, 2022 { 0x00D6, "O" }, 2023 { 0x00D8, "O" }, 2024 { 0x00D9, "U" }, 2025 { 0x00DA, "U" }, 2026 { 0x00DB, "U" }, 2027 { 0x00DC, "U" }, 2028 { 0x00DD, "Y" }, 2029 { 0x00E0, "a" }, 2030 { 0x00E1, "a" }, 2031 { 0x00E2, "a" }, 2032 { 0x00E3, "a" }, 2033 { 0x00E4, "a" }, 2034 { 0x00E5, "a" }, 2035 { 0x00E6, "ae" }, 2036 { 0x00E7, "c" }, 2037 { 0x00E8, "e" }, 2038 { 0x00E9, "e" }, 2039 { 0x00EA, "e" }, 2040 { 0x00EB, "e" }, 2041 { 0x00EC, "i" }, 2042 { 0x00ED, "i" }, 2043 { 0x00EE, "i" }, 2044 { 0x00EF, "i" }, 2045 { 0x00F1, "n" }, 2046 { 0x00F2, "o" }, 2047 { 0x00F3, "o" }, 2048 { 0x00F4, "o" }, 2049 { 0x00F5, "o" }, 2050 { 0x00F6, "o" }, 2051 { 0x00F7, "/" }, 2052 { 0x00F8, "o" }, 2053 { 0x00F9, "u" }, 2054 { 0x00FA, "u" }, 2055 { 0x00FB, "u" }, 2056 { 0x00FC, "u" }, 2057 { 0x00FD, "y" }, 2058 { 0x00FF, "y" }, 2059 { 0x0131, "i" }, 2060 { 0x0152, "OE" }, 2061 { 0x0153, "oe" }, 2062 { 0x0178, "Y" }, 2063 { 0x2013, "-" }, 2064 { 0x2014, "-" }, 2065 { 0x2018, "'" }, 2066 { 0x2019, "'" }, 2067 { 0x201A, "," }, 2068 { 0x201C, "\"" }, 2069 { 0x201D, "\"" }, 2070 { 0x201E, ",," }, 2071 { 0x2022, "." }, 2072 { 0x2026, "..." }, 2073 { 0x2039, "<" }, 2074 { 0x203A, ">" }, 2075 { 0x2044, "/" }, 2076 { 0x2122, "TM" }, 2077 { 0x2206, "d" }, 2078 { 0x2211, "S" }, 2079 { 0x2260, "!=" }, 2080 { 0x2264, "<=" }, 2081 { 0x2265, ">=" }, 2082 { 0x2318, "#" }, 2083 { 0xFB01, "fi" }, 2084 { 0xFB02, "fl" } 2085 }; 2086 2087/******************************************************************************/ 2088 2089void dng_string::ForceASCII () 2090 { 2091 2092 if (!IsASCII ()) 2093 { 2094 2095 uint32 tempBufferSize = 2096 SafeUint32Add (SafeUint32Mult(Length(), 3), 1); 2097 dng_memory_data tempBuffer (tempBufferSize); 2098 2099 char *dPtr = tempBuffer.Buffer_char (); 2100 char * const destEnd = dPtr + tempBufferSize; 2101 2102 const char *sPtr = Get (); 2103 2104 while (*sPtr) 2105 { 2106 2107 uint32 x = DecodeUTF8 (sPtr); 2108 2109 if (x <= 0x007F) 2110 { 2111 2112 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2113 *(dPtr++) = (char) x; 2114 2115 } 2116 2117 else 2118 { 2119 2120 const char *ascii = NULL; 2121 2122 const uint32 kTableEntrys = sizeof (kUnicodeToLowASCII ) / 2123 sizeof (kUnicodeToLowASCII [0]); 2124 2125 for (uint32 entry = 0; entry < kTableEntrys; entry++) 2126 { 2127 2128 if (kUnicodeToLowASCII [entry] . unicode == x) 2129 { 2130 2131 ascii = kUnicodeToLowASCII [entry] . ascii; 2132 2133 break; 2134 2135 } 2136 2137 } 2138 2139 if (ascii) 2140 { 2141 2142 while (*ascii) 2143 { 2144 2145 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2146 *(dPtr++) = *(ascii++); 2147 2148 } 2149 2150 } 2151 2152 else 2153 { 2154 2155 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2156 *(dPtr++) ='?'; 2157 2158 } 2159 2160 } 2161 2162 } 2163 2164 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2165 *dPtr = 0; 2166 2167 Set (tempBuffer.Buffer_char ()); 2168 2169 } 2170 2171 } 2172 2173/******************************************************************************/ 2174 2175static dng_mutex gProtectUCCalls ("gProtectUCCalls"); 2176 2177/******************************************************************************/ 2178 2179int32 dng_string::Compare (const dng_string &s) const 2180 { 2181 2182 #if qMacOS 2183 #if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 2184 2185 ThrowProgramError ("Compare() not implemented on iOS"); 2186 return 0; 2187 2188 #else 2189 2190 { 2191 2192 dng_memory_data aStrA; 2193 dng_memory_data aStrB; 2194 2195 uint32 aLenA = this->Get_UTF16 (aStrA); 2196 uint32 aLenB = s .Get_UTF16 (aStrB); 2197 2198 if (aLenA > 0) 2199 { 2200 2201 if (aLenB > 0) 2202 { 2203 2204 // For some Mac OS versions anyway, UCCompareTextDefault is not 2205 // thread safe. 2206 2207 dng_lock_mutex lockMutex (&gProtectUCCalls); 2208 2209 UCCollateOptions aOptions = kUCCollateStandardOptions | 2210 kUCCollatePunctuationSignificantMask; 2211 2212 SInt32 aOrder = -1; 2213 2214 Boolean aEqual = false; 2215 2216 OSStatus searchStatus = ::UCCompareTextDefault (aOptions, 2217 aStrA.Buffer_uint16 (), 2218 aLenA, 2219 aStrB.Buffer_uint16 (), 2220 aLenB, 2221 &aEqual, 2222 &aOrder); 2223 2224 if (searchStatus == noErr) 2225 { 2226 2227 if (aEqual || (aOrder == 0)) 2228 { 2229 return 0; 2230 } 2231 2232 else 2233 { 2234 return (aOrder > 0) ? 1 : -1; 2235 } 2236 2237 } 2238 2239 else 2240 { 2241 2242 DNG_REPORT ("UCCompareTextDefault failed"); 2243 2244 return -1; 2245 2246 } 2247 2248 } 2249 2250 else 2251 { 2252 return 1; 2253 } 2254 2255 } 2256 2257 else 2258 { 2259 2260 if (aLenB > 0) 2261 { 2262 return -1; 2263 } 2264 2265 else 2266 { 2267 return 0; 2268 } 2269 2270 } 2271 2272 } 2273 2274 #endif // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 2275 #elif qWinOS 2276 2277 { 2278 2279 dng_memory_data aStrA; 2280 dng_memory_data aStrB; 2281 2282 uint32 aLenA = this->Get_UTF16 (aStrA); 2283 uint32 aLenB = s .Get_UTF16 (aStrB); 2284 2285 if (aLenA > 0) 2286 { 2287 2288 if (aLenB > 0) 2289 { 2290 2291 LCID locale = LOCALE_SYSTEM_DEFAULT; 2292 2293 DWORD aFlags = NORM_IGNOREWIDTH; 2294 2295 int aOrder = ::CompareStringW (locale, 2296 aFlags, 2297 (const WCHAR *) aStrA.Buffer_uint16 (), 2298 aLenA, 2299 (const WCHAR *) aStrB.Buffer_uint16 (), 2300 aLenB); 2301 2302 if (aOrder == CSTR_EQUAL) 2303 { 2304 return 0; 2305 } 2306 2307 else if (aOrder == CSTR_GREATER_THAN) 2308 { 2309 return 1; 2310 } 2311 2312 else 2313 { 2314 return -1; 2315 } 2316 2317 } 2318 2319 else 2320 { 2321 return 1; 2322 } 2323 2324 } 2325 2326 else 2327 { 2328 2329 if (aLenB > 0) 2330 { 2331 return -1; 2332 } 2333 else 2334 { 2335 return 0; 2336 } 2337 2338 } 2339 2340 } 2341 2342 #else 2343 2344 // Fallback to a pure Unicode sort order. 2345 2346 { 2347 2348 for (uint32 pass = 0; pass < 2; pass++) 2349 { 2350 2351 const char *aPtr = Get (); 2352 const char *bPtr = s.Get (); 2353 2354 while (*aPtr || *bPtr) 2355 { 2356 2357 if (!bPtr) 2358 { 2359 return 1; 2360 } 2361 2362 else if (!aPtr) 2363 { 2364 return -1; 2365 } 2366 2367 uint32 a = DecodeUTF8 (aPtr); 2368 uint32 b = DecodeUTF8 (bPtr); 2369 2370 // Ignore case on first compare pass. 2371 2372 if (pass == 0) 2373 { 2374 2375 if (a >= (uint32) 'a' && a <= (uint32) 'z') 2376 { 2377 a = a - (uint32) 'a' + (uint32) 'A'; 2378 } 2379 2380 if (b >= (uint32) 'a' && b <= (uint32) 'z') 2381 { 2382 b = b - (uint32) 'a' + (uint32) 'A'; 2383 } 2384 2385 } 2386 2387 if (b > a) 2388 { 2389 return 1; 2390 } 2391 2392 else if (a < b) 2393 { 2394 return -1; 2395 } 2396 2397 } 2398 2399 } 2400 2401 } 2402 2403 #endif 2404 2405 return 0; 2406 2407 } 2408 2409/*****************************************************************************/ 2410