1/*****************************************************************************/ 2// Copyright 2006-2007 Adobe Systems Incorporated 3// All Rights Reserved. 4// 5// NOTICE: Adobe permits you to use, modify, and distribute this file in 6// accordance with the terms of the Adobe license agreement accompanying it. 7/*****************************************************************************/ 8 9/* $Id: //mondo/dng_sdk_1_4/dng_sdk/source/dng_string.cpp#2 $ */ 10/* $DateTime: 2012/07/31 22:04:34 $ */ 11/* $Change: 840853 $ */ 12/* $Author: tknoll $ */ 13 14/*****************************************************************************/ 15 16#include "dng_string.h" 17 18#include "dng_assertions.h" 19#include "dng_exceptions.h" 20#include "dng_flags.h" 21#include "dng_mutex.h" 22#include "dng_utils.h" 23#include "dng_safe_arithmetic.h" 24 25#if qMacOS 26#include <TargetConditionals.h> 27#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 28#include <MobileCoreServices/MobileCoreServices.h> 29#else 30#include <CoreServices/CoreServices.h> 31#endif // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 32#endif // qMacOS 33 34#if qWinOS 35#include <windows.h> 36#endif 37 38#if qiPhone || qAndroid || qLinux 39#include <ctype.h> // for isdigit 40#endif 41 42/*****************************************************************************/ 43 44const uint32 kREPLACEMENT_CHARACTER = 0x0000FFFD; 45 46/*****************************************************************************/ 47 48// Returns the length of the zero-terminated string 's'. Throws a dng_exception 49// if the length of 's' is too large to be represented as a uint32_t. 50static uint32 strlenAsUint32(const char *s) 51 { 52 53 uint32 lengthAsUint32 = 0; 54 ConvertUnsigned(strlen(s), &lengthAsUint32); 55 56 return lengthAsUint32; 57 58 } 59 60// Checks whether there is enough space left in the buffer pointed to by 61// 'currentPos' to write at least 'space' elements of type T (to positions 62// currentPos[0] through currentPos[space - 1]. Throws a dng_exception if there 63// is not enough space left in the buffer. 64// 'bufferEnd' should point one element beyond the end of the buffer. For 65// example, if the buffer is "T buffer[3];", then bufferEnd should point to 66// T + 3. 67template <class T> 68static void CheckSpaceLeftInBuffer(const T *currentPos, 69 const T *bufferEnd, 70 size_t space) 71 { 72 73 if (bufferEnd < currentPos || static_cast<size_t>(bufferEnd - currentPos) < space) 74 { 75 ThrowMemoryFull ("Buffer overrun"); 76 } 77 78 } 79 80/*****************************************************************************/ 81 82// Throws an exception to notify the user of code that has not been security 83// hardened and prevent execution of that code. 84// 85// Though the DNG SDK in general has been security-hardened, this does not apply 86// to the following Mac-OS- and Windows-specific functions. Calls to 87// ThrowNotHardened() have been added to these functions to alert callers of 88// this fact. 89// 90// If you're trying to use a function that calls ThrowNotHardened(), you need to 91// fix the security issues noted in the comment next to the ThrowNotHardened() 92// call. Once you have fixed these issues, obtain a security review for the 93// fixes. This may require fuzzing of the modified code on the target platform. 94static void ThrowNotHardened() 95 { 96 ThrowProgramError ("This function has not been security-hardened"); 97 } 98 99#if qMacOS 100#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 101 102static uint32 Extract_SystemEncoding (const dng_string &dngString, 103 dng_memory_data &buffer) 104 { 105 // TODO: Needs implementation. 106 ThrowProgramError ("Extract_SystemEncoding() not implemented on iOS"); 107 return 0; 108 } 109 110static void Assign_SystemEncoding (dng_string &dngString, 111 const char *otherString) 112 { 113 // TODO: Needs implementation. 114 ThrowProgramError ("Assign_SystemEncoding() not implemented on iOS"); 115 116 } 117 118static void Assign_JIS_X208_1990 (dng_string &dngString, 119 const char *otherString) 120 { 121 // TODO: Needs implementation. 122 ThrowProgramError ("Assign_JIS_X208_1990() not implemented on iOS"); 123 } 124 125#else 126 127static void Assign_Multibyte (dng_string &dngString, 128 const char *otherString, 129 TextEncoding encoding) 130 { 131 132 // This function contains security-vulnerable code. Do not use. 133 // The particular vulnerabilities are: 134 // - Casting the result of strlen() to a uint32 may case truncation. (Use 135 // strlenAsUint32() instead.) 136 // - The computation of aBufSize and the subsequent addition of 1 in the 137 // call to the dng_memory_data constructor may wrap around. 138 ThrowNotHardened(); 139 140 uint32 aSize = (uint32) strlen (otherString); 141 142 if (aSize > 0) 143 { 144 145 uint32 aBufSize = aSize * 6 + 256; 146 147 dng_memory_data aBuf (aBufSize + 1); 148 149 UnicodeMapping aMapping; 150 151 aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0, 152 kUnicodeNoSubset, 153 kUnicodeUTF8Format); 154 155 aMapping.otherEncoding = encoding; 156 aMapping.mappingVersion = kUnicodeUseLatestMapping; 157 158 TextToUnicodeInfo aInfo = NULL; 159 160 if (::CreateTextToUnicodeInfo (&aMapping, &aInfo) == noErr) 161 { 162 163 ByteCount aInput = 0; 164 ByteCount aOutput = 0; 165 166 ::ConvertFromTextToUnicode (aInfo, 167 aSize, 168 otherString, 169 kUnicodeUseFallbacksMask | 170 kUnicodeLooseMappingsMask, 171 0, 172 NULL, 173 NULL, 174 NULL, 175 aBufSize, 176 &aInput, 177 &aOutput, 178 (UniChar *) aBuf.Buffer ()); 179 180 ::DisposeTextToUnicodeInfo (&aInfo); 181 182 if (aOutput > 0 && aOutput <= aBufSize) 183 { 184 185 char *aBufChar = aBuf.Buffer_char (); 186 187 aBufChar [aOutput] = 0; 188 189 dngString.Set (aBufChar); 190 191 return; 192 193 } 194 195 } 196 197 } 198 199 dngString.Clear (); 200 201 } 202 203static uint32 Extract_Multibyte (const dng_string &dngString, 204 dng_memory_data &buffer, 205 TextEncoding encoding) 206 { 207 208 // This function contains security-vulnerable code. Do not use. 209 // The particular vulnerabilities are: 210 // - The computation of aBufSize may wrap around. 211 // - The computation of the argument to buffer.Allocate() may overflow; the 212 // conversion to uint32 is also problematic. 213 // - The signed-to-unsigned conversion in the return statement " 214 // return (uint32) aOutput;" may be problematic. 215 ThrowNotHardened(); 216 217 uint32 aSize = dngString.Length (); 218 219 if (aSize > 0) 220 { 221 222 uint32 aBufSize = (aSize * 2) + 256; 223 224 dng_memory_data tempBuffer (aBufSize); 225 226 UnicodeMapping aMapping; 227 228 aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0, 229 kUnicodeNoSubset, 230 kUnicodeUTF8Format); 231 232 aMapping.otherEncoding = encoding; 233 aMapping.mappingVersion = kUnicodeUseLatestMapping; 234 235 UnicodeToTextInfo aInfo = NULL; 236 237 if (::CreateUnicodeToTextInfo (&aMapping, &aInfo) == noErr) 238 { 239 240 ByteCount aInput = 0; 241 ByteCount aOutput = 0; 242 243 ::ConvertFromUnicodeToText (aInfo, 244 aSize, 245 (const UniChar *) dngString.Get (), 246 kUnicodeUseFallbacksMask | 247 kUnicodeLooseMappingsMask | 248 kUnicodeDefaultDirectionMask, 249 0, 250 NULL, 251 NULL, 252 NULL, 253 aBufSize, 254 &aInput, 255 &aOutput, 256 tempBuffer.Buffer_char ()); 257 258 ::DisposeUnicodeToTextInfo (&aInfo); 259 260 if (aOutput > 0) 261 { 262 263 buffer.Allocate ((uint32) (aOutput + 1)); 264 265 memcpy (buffer.Buffer (), 266 tempBuffer.Buffer (), 267 aOutput); 268 269 buffer.Buffer_char () [aOutput] = 0; 270 271 return (uint32) aOutput; 272 273 } 274 275 } 276 277 } 278 279 buffer.Allocate (1); 280 281 buffer.Buffer_char () [0] = 0; 282 283 return 0; 284 285 } 286 287static void Assign_SystemEncoding (dng_string &dngString, 288 const char *otherString) 289 { 290 291 TextEncoding aEncoding; 292 293 ::UpgradeScriptInfoToTextEncoding (smSystemScript, 294 kTextLanguageDontCare, 295 kTextRegionDontCare, 296 NULL, 297 &aEncoding); 298 299 Assign_Multibyte (dngString, 300 otherString, 301 aEncoding); 302 303 } 304 305static uint32 Extract_SystemEncoding (const dng_string &dngString, 306 dng_memory_data &buffer) 307 { 308 309 TextEncoding aEncoding; 310 311 ::UpgradeScriptInfoToTextEncoding (smSystemScript, 312 kTextLanguageDontCare, 313 kTextRegionDontCare, 314 NULL, 315 &aEncoding); 316 317 return Extract_Multibyte (dngString, 318 buffer, 319 aEncoding); 320 321 } 322 323static void Assign_JIS_X208_1990 (dng_string &dngString, 324 const char *otherString) 325 { 326 327 Assign_Multibyte (dngString, 328 otherString, 329 kTextEncodingJIS_X0208_90); 330 331 } 332 333#endif // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 334#endif // qMacOS 335 336/*****************************************************************************/ 337 338#if qWinOS 339 340static void Assign_Multibyte (dng_string &dngString, 341 const char *otherString, 342 UINT encoding) 343 { 344 345 // This function contains security-vulnerable code. Do not use. 346 // The particular vulnerabilities are: 347 // - Converting the return value of strlen() to int may cause overflow. 348 // - The computation of aBufChars and of the argument to the dng_memory_data 349 // constructor may overflow. Additionally, there is an implicit 350 // signed-to-unsigned conversion in the call to the dng_memory_data 351 // constructor. 352 ThrowNotHardened(); 353 354 DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes"); 355 356 int aSize = (int) strlen (otherString); 357 358 if (aSize > 0) 359 { 360 361 int aBufChars = aSize * 3 + 128; 362 363 dng_memory_data aBuf ((aBufChars + 1) << 1); 364 365 int aResult = ::MultiByteToWideChar (encoding, 366 0, 367 otherString, 368 aSize, 369 (WCHAR *) aBuf.Buffer (), 370 aBufChars); 371 372 if (aResult > 0 && aResult <= aBufChars) 373 { 374 375 uint16 * aUTF16 = aBuf.Buffer_uint16 (); 376 377 aUTF16 [aResult] = 0; 378 379 dngString.Set_UTF16 (aUTF16); 380 381 return; 382 383 } 384 385 } 386 387 dngString.Clear (); 388 389 } 390 391static uint32 Extract_Multibyte (const dng_string &dngString, 392 dng_memory_data &buffer, 393 UINT encoding) 394 { 395 396 // This function contains security-vulnerable code. Do not use. 397 // The particular vulnerabilities are: 398 // - Converting the return value of dngString.Get_UTF16() may cause 399 // overflow. 400 // - The computation of dBufSize may overflow. 401 // - The calls to the dng_memory_data constructor and to buffer.Allocate() 402 // trigger implicit conversions of int to uint32 that may be problematic. 403 // - The memcpy() call triggers an implicit conversion of aResult to a 404 // size_t, which may be problematic. 405 // - The conversion of aResult to a uint32 in the return statement may be 406 // problematic. 407 ThrowNotHardened(); 408 409 DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes"); 410 411 dng_memory_data sBuffer; 412 413 int aCount = dngString.Get_UTF16 (sBuffer); 414 415 int dBufSize = aCount * 2 + 256; 416 417 dng_memory_data dBuffer (dBufSize); 418 419 int aResult = ::WideCharToMultiByte (encoding, 420 0, 421 (WCHAR *) sBuffer.Buffer (), 422 aCount, 423 dBuffer.Buffer_char (), 424 dBufSize, 425 NULL, 426 NULL); 427 428 if (aResult < 0) 429 aResult = 0; 430 431 buffer.Allocate (aResult + 1); 432 433 memcpy (buffer.Buffer (), 434 dBuffer.Buffer (), 435 aResult); 436 437 buffer.Buffer_char () [aResult] = 0; 438 439 return (uint32) aResult; 440 441 } 442 443static void Assign_SystemEncoding (dng_string &dngString, 444 const char *otherString) 445 { 446 447 Assign_Multibyte (dngString, 448 otherString, 449 ::GetACP ()); 450 451 } 452 453static uint32 Extract_SystemEncoding (const dng_string &dngString, 454 dng_memory_data &buffer) 455 { 456 457 return Extract_Multibyte (dngString, 458 buffer, 459 ::GetACP ()); 460 461 } 462 463static void Assign_JIS_X208_1990 (dng_string &dngString, 464 const char *otherString) 465 { 466 467 // From MSDN documentation: 20932 = JIS X 0208-1990 & 0121-1990 468 469 const UINT kJIS = 20932; 470 471 Assign_Multibyte (dngString, 472 otherString, 473 kJIS); 474 475 } 476 477#endif 478 479/*****************************************************************************/ 480 481static bool IsASCII (const char *s) 482 { 483 484 if (!s) 485 { 486 487 return true; 488 489 } 490 491 while (true) 492 { 493 494 uint8 c = (uint8) *(s++); 495 496 if (c == 0) 497 { 498 499 break; 500 501 } 502 503 if (c & 0x80) 504 { 505 506 return false; 507 508 } 509 510 } 511 512 return true; 513 514 } 515 516/*****************************************************************************/ 517 518dng_string::dng_string () 519 520 : fData () 521 522 { 523 524 } 525 526/*****************************************************************************/ 527 528dng_string::dng_string (const dng_string &s) 529 530 : fData () 531 532 { 533 534 Set (s.Get ()); 535 536 } 537 538/*****************************************************************************/ 539 540dng_string & dng_string::operator= (const dng_string &s) 541 { 542 543 if (this != &s) 544 { 545 546 Set (s.Get ()); 547 548 } 549 550 return *this; 551 552 } 553 554/*****************************************************************************/ 555 556dng_string::~dng_string () 557 { 558 559 } 560 561/*****************************************************************************/ 562 563const char * dng_string::Get () const 564 { 565 566 if (fData.Buffer ()) 567 { 568 569 return fData.Buffer_char (); 570 571 } 572 573 return ""; 574 575 } 576 577/*****************************************************************************/ 578 579bool dng_string::IsASCII () const 580 { 581 582 return ::IsASCII (Get ()); 583 584 } 585 586/*****************************************************************************/ 587 588void dng_string::Set (const char *s) 589 { 590 591 // Measure the new length. 592 593 uint32 newLen = (s != NULL ? strlenAsUint32 (s) : 0); 594 595 // If it is a NULL string, then clear the buffer. 596 597 if (newLen == 0) 598 { 599 600 fData.Clear (); 601 602 } 603 604 // Else we need to copy the bytes. 605 606 else 607 { 608 609 uint32 oldLen = Length (); 610 611 // We might be setting this string to a sub-string of itself, 612 // so don't reallocate the data unless the string is getting 613 // longer. 614 615 if (newLen > oldLen) 616 { 617 618 fData.Clear (); 619 620 fData.Allocate (SafeUint32Add (newLen, 1)); 621 622 } 623 624 char *d = fData.Buffer_char (); 625 626 for (uint32 k = 0; k <= newLen; k++) 627 { 628 629 d [k] = s [k]; 630 631 } 632 633 } 634 635 } 636 637/*****************************************************************************/ 638 639void dng_string::Set_ASCII (const char *s) 640 { 641 642 if (::IsASCII (s)) 643 { 644 645 Set (s); 646 647 } 648 649 else 650 { 651 652 Set_SystemEncoding (s); 653 654 } 655 656 } 657 658/*****************************************************************************/ 659 660void dng_string::Set_UTF8 (const char *s) 661 { 662 663 uint32 len = strlenAsUint32 (s); 664 665 const char *sEnd = s + len; 666 667 // Worst case expansion is 1-byte characters expanding to 668 // replacement character, which requires 3 bytes. 669 670 const uint32 destBufferLength = SafeUint32Add (SafeUint32Mult (len, 3), 1); 671 dng_memory_data buffer (destBufferLength); 672 673 uint8 *d = buffer.Buffer_uint8 (); 674 uint8 * const destEnd = d + destBufferLength; 675 676 while (s < sEnd) 677 { 678 679 uint32 aChar = DecodeUTF8 (s, (uint32) (sEnd - s)); 680 681 if (aChar > 0x7FFFFFFF) 682 { 683 aChar = kREPLACEMENT_CHARACTER; 684 } 685 686 #if qDNGValidate 687 688 if (aChar == kREPLACEMENT_CHARACTER) 689 { 690 ReportWarning ("Expected UTF-8 value is not valid UTF-8 (or contains a kREPLACEMENT_CHARACTER)"); 691 } 692 693 #endif 694 695 if (aChar < 0x00000080) 696 { 697 CheckSpaceLeftInBuffer (d, destEnd, 1); 698 *(d++) = (uint8) aChar; 699 } 700 701 else if (aChar < 0x00000800) 702 { 703 CheckSpaceLeftInBuffer (d, destEnd, 2); 704 *(d++) = (uint8) ((aChar >> 6) | 0x000000C0); 705 *(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080); 706 } 707 708 else if (aChar < 0x00010000) 709 { 710 CheckSpaceLeftInBuffer (d, destEnd, 3); 711 *(d++) = (uint8) ( (aChar >> 12) | 0x000000E0); 712 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 713 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 714 } 715 716 else if (aChar < 0x00200000) 717 { 718 CheckSpaceLeftInBuffer (d, destEnd, 4); 719 *(d++) = (uint8) ( (aChar >> 18) | 0x000000F0); 720 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 721 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 722 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 723 } 724 725 else if (aChar < 0x04000000) 726 { 727 CheckSpaceLeftInBuffer (d, destEnd, 5); 728 *(d++) = (uint8) ( (aChar >> 24) | 0x000000F8); 729 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 730 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 731 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 732 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 733 } 734 735 else 736 { 737 CheckSpaceLeftInBuffer (d, destEnd, 6); 738 *(d++) = (uint8) ( (aChar >> 30) | 0x000000FC); 739 *(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080); 740 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 741 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 742 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 743 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 744 } 745 746 } 747 748 CheckSpaceLeftInBuffer (d, destEnd, 1); 749 *d = 0; 750 751 Set (buffer.Buffer_char ()); 752 753 } 754 755/*****************************************************************************/ 756 757uint32 dng_string::Get_SystemEncoding (dng_memory_data &buffer) const 758 { 759 760 if (IsASCII ()) 761 { 762 763 uint32 len = Length (); 764 765 const uint32 destBufferLength = SafeUint32Add (len, 1); 766 buffer.Allocate (destBufferLength); 767 768 memcpy (buffer.Buffer (), Get (), destBufferLength); 769 770 return len; 771 772 } 773 774 else 775 { 776 777 #if qMacOS || qWinOS 778 779 return Extract_SystemEncoding (*this, buffer); 780 781 #else 782 783 // Fallback logic to force the string to ASCII. 784 785 dng_string temp (*this); 786 787 temp.ForceASCII (); 788 789 return temp.Get_SystemEncoding (buffer); 790 791 #endif 792 793 } 794 795 } 796 797/*****************************************************************************/ 798 799void dng_string::Set_SystemEncoding (const char *s) 800 { 801 802 if (::IsASCII (s)) 803 { 804 805 Set (s); 806 807 } 808 809 else 810 { 811 812 #if qMacOS || qWinOS 813 814 Assign_SystemEncoding (*this, s); 815 816 #else 817 818 // Fallback logic that just grabs the ASCII characters and 819 // ignores the non-ASCII characters. 820 821 uint32 len = strlenAsUint32 (s); 822 823 const uint32 destBufferLength = SafeUint32Add (len, 1); 824 dng_memory_data buffer (destBufferLength); 825 826 uint8 *d = buffer.Buffer_uint8 (); 827 uint8 * const destEnd = d + destBufferLength; 828 829 while (*s) 830 { 831 832 uint8 c = (uint8) *(s++); 833 834 if ((c & 0x80) == 0) 835 { 836 837 CheckSpaceLeftInBuffer (d, destEnd, 1); 838 *(d++) = c; 839 840 } 841 842 } 843 844 CheckSpaceLeftInBuffer (d, destEnd, 1); 845 *d = 0; 846 847 Set (buffer.Buffer_char ()); 848 849 #endif 850 851 } 852 853 } 854 855/*****************************************************************************/ 856 857bool dng_string::ValidSystemEncoding () const 858 { 859 860 if (IsASCII ()) 861 { 862 863 return true; 864 865 } 866 867 dng_memory_data buffer; 868 869 Get_SystemEncoding (buffer); 870 871 dng_string temp; 872 873 temp.Set_SystemEncoding (buffer.Buffer_char ()); 874 875 return (*this == temp); 876 877 } 878 879/*****************************************************************************/ 880 881void dng_string::Set_JIS_X208_1990 (const char *s) 882 { 883 884 if (::IsASCII (s)) 885 { 886 887 Set (s); 888 889 } 890 891 else 892 { 893 894 #if qMacOS || qWinOS 895 896 Assign_JIS_X208_1990 (*this, s); 897 898 #else 899 900 // Fallback to the ASCII extraction logic. 901 902 Set_SystemEncoding (s); 903 904 #endif 905 906 } 907 908 } 909 910/*****************************************************************************/ 911 912uint32 dng_string::DecodeUTF8 (const char *&s, 913 uint32 maxBytes, 914 bool *isValid) 915 { 916 917 static const uint8 gUTF8Bytes [256] = 918 { 919 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 920 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 921 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 922 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 923 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 924 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 925 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 926 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0 927 }; 928 929 if (isValid) 930 { 931 *isValid = true; 932 } 933 934 const uint8 *nBuf = (const uint8 *) s; 935 936 uint32 aChar = nBuf [0]; 937 938 uint32 aSize = gUTF8Bytes [aChar]; 939 940 if (aSize > maxBytes) 941 { 942 943 s += maxBytes; 944 945 if (isValid) 946 { 947 *isValid = false; 948 } 949 950 return kREPLACEMENT_CHARACTER; 951 952 } 953 954 s += aSize; 955 956 for (uint32 extra = 1; extra < aSize; extra++) 957 { 958 959 if ((nBuf [extra] & 0xC0) != 0x80) 960 { 961 962 if (isValid) 963 { 964 *isValid = false; 965 } 966 967 return kREPLACEMENT_CHARACTER; 968 969 } 970 971 } 972 973 switch (aSize) 974 { 975 976 case 0: 977 { 978 979 s++; // Don't get stuck in infinite loop 980 981 if (isValid) 982 { 983 *isValid = false; 984 } 985 986 return kREPLACEMENT_CHARACTER; 987 988 } 989 990 case 1: 991 { 992 993 return aChar; 994 995 } 996 997 case 2: 998 { 999 1000 aChar = ((aChar << 6) + nBuf [1]) - (uint32) 0x00003080UL; 1001 1002 break; 1003 1004 } 1005 1006 case 3: 1007 { 1008 1009 aChar = ((((aChar << 6) + nBuf [1]) 1010 << 6) + nBuf [2]) - (uint32) 0x000E2080UL; 1011 1012 break; 1013 1014 } 1015 1016 case 4: 1017 { 1018 1019 aChar = ((((((aChar << 6) + nBuf [1]) 1020 << 6) + nBuf [2]) 1021 << 6) + nBuf [3]) - (uint32) 0x03C82080UL; 1022 1023 break; 1024 1025 } 1026 } 1027 1028 if (aChar < 0x7F || aChar > 0x0010FFFF) 1029 { 1030 1031 if (isValid) 1032 { 1033 *isValid = false; 1034 } 1035 1036 return kREPLACEMENT_CHARACTER; 1037 1038 } 1039 1040 return aChar; 1041 1042 } 1043 1044/*****************************************************************************/ 1045 1046bool dng_string::IsUTF8 (const char *s) 1047 { 1048 1049 uint32 len = strlenAsUint32 (s); 1050 1051 const char *sEnd = s + len; 1052 1053 while (s < sEnd) 1054 { 1055 1056 bool isValid = true; 1057 1058 (void) DecodeUTF8 (s, (uint32) (sEnd - s), &isValid); 1059 1060 if (!isValid) 1061 { 1062 return false; 1063 } 1064 1065 } 1066 1067 return true; 1068 1069 } 1070 1071/*****************************************************************************/ 1072 1073void dng_string::Set_UTF8_or_System (const char *s) 1074 { 1075 1076 if (::IsASCII (s)) 1077 { 1078 1079 Set (s); 1080 1081 } 1082 1083 else if (IsUTF8 (s)) 1084 { 1085 1086 Set_UTF8 (s); 1087 1088 } 1089 1090 else 1091 { 1092 1093 Set_SystemEncoding (s); 1094 1095 } 1096 1097 } 1098 1099/*****************************************************************************/ 1100 1101uint32 dng_string::Get_UTF16 (dng_memory_data &buffer) const 1102 { 1103 1104 uint32 count = 0; 1105 1106 const char *sPtr = Get (); 1107 1108 while (*sPtr) 1109 { 1110 1111 uint32 x = DecodeUTF8 (sPtr); 1112 1113 if (x <= 0x0000FFFF || 1114 x > 0x0010FFFF) 1115 { 1116 1117 count = SafeUint32Add (count, 1); 1118 1119 } 1120 1121 else 1122 { 1123 1124 count = SafeUint32Add (count, 2); 1125 1126 } 1127 1128 } 1129 1130 const uint32 destBufferLength = SafeUint32Add (count, 1); 1131 buffer.Allocate (destBufferLength, sizeof (uint16)); 1132 1133 uint16 *dPtr = buffer.Buffer_uint16 (); 1134 uint16 * const destEnd = dPtr + destBufferLength; 1135 1136 sPtr = Get (); 1137 1138 while (*sPtr) 1139 { 1140 1141 uint32 x = DecodeUTF8 (sPtr); 1142 1143 if (x <= 0x0000FFFF) 1144 { 1145 1146 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1147 *(dPtr++) = (uint16) x; 1148 1149 } 1150 1151 else if (x > 0x0010FFFF) 1152 { 1153 1154 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1155 *(dPtr++) = (uint16) kREPLACEMENT_CHARACTER; 1156 1157 } 1158 1159 else 1160 { 1161 1162 x -= 0x00010000; 1163 1164 CheckSpaceLeftInBuffer (dPtr, destEnd, 2); 1165 *(dPtr++) = (uint16) ((x >> 10 ) + 0x0000D800); 1166 *(dPtr++) = (uint16) ((x & 0x000003FF) + 0x0000DC00); 1167 1168 } 1169 1170 } 1171 1172 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1173 *dPtr = 0; 1174 1175 return count; 1176 1177 } 1178 1179/*****************************************************************************/ 1180 1181void dng_string::Set_UTF16 (const uint16 *s) 1182 { 1183 1184 if (!s) 1185 { 1186 Clear (); 1187 return; 1188 } 1189 1190 bool swap = false; 1191 1192 if (s [0] == 0xFFFE) // Swapped byte order marker 1193 { 1194 swap = true; 1195 s++; 1196 } 1197 1198 else if (s [0] == 0xFEFF) // Non-swapped byte order marker 1199 { 1200 s++; 1201 } 1202 1203 uint32 length16 = 0; 1204 1205 while (s [length16] != 0) 1206 { 1207 length16 = SafeUint32Add (length16, 1); 1208 } 1209 1210 const uint16 *sEnd = s + length16; 1211 1212 const uint32 destBufferSize = 1213 SafeUint32Add (SafeUint32Mult (length16, 6), 1); 1214 dng_memory_data buffer (destBufferSize); 1215 1216 uint8 *d = buffer.Buffer_uint8 (); 1217 uint8 * const destEnd = d + destBufferSize; 1218 1219 while (s < sEnd) 1220 { 1221 1222 uint32 aChar = *s++; 1223 1224 if (swap) 1225 { 1226 aChar = ((aChar << 8) | (aChar >> 8)) & 0x0000FFFF; 1227 } 1228 1229 if ((aChar >= 0x0000D800) && (aChar <= 0x0000DBFF) && (s < sEnd)) 1230 { 1231 1232 uint32 aLow = *s; 1233 1234 if (swap) 1235 { 1236 aLow = ((aLow << 8) | (aLow >> 8)) & 0x0000FFFF; 1237 } 1238 1239 if ((aLow >= 0x0000DC00) && (aLow <= 0x0000DFFF)) 1240 { 1241 1242 aChar = ((aChar - 0x0000D800) << 10) + 1243 (aLow - 0x0000DC00) + 1244 0x00010000; 1245 1246 s++; 1247 1248 } 1249 1250 } 1251 1252 if (aChar > 0x7FFFFFFF) 1253 { 1254 aChar = kREPLACEMENT_CHARACTER; 1255 } 1256 1257 if (aChar < 0x00000080) 1258 { 1259 CheckSpaceLeftInBuffer (d, destEnd, 1); 1260 *(d++) = (uint8) aChar; 1261 } 1262 1263 else if (aChar < 0x00000800) 1264 { 1265 CheckSpaceLeftInBuffer (d, destEnd, 2); 1266 *(d++) = (uint8) ((aChar >> 6) | 0x000000C0); 1267 *(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080); 1268 } 1269 1270 else if (aChar < 0x00010000) 1271 { 1272 CheckSpaceLeftInBuffer (d, destEnd, 3); 1273 *(d++) = (uint8) ( (aChar >> 12) | 0x000000E0); 1274 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1275 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1276 } 1277 1278 else if (aChar < 0x00200000) 1279 { 1280 CheckSpaceLeftInBuffer (d, destEnd, 4); 1281 *(d++) = (uint8) ( (aChar >> 18) | 0x000000F0); 1282 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1283 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1284 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1285 } 1286 1287 else if (aChar < 0x04000000) 1288 { 1289 CheckSpaceLeftInBuffer (d, destEnd, 5); 1290 *(d++) = (uint8) ( (aChar >> 24) | 0x000000F8); 1291 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 1292 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1293 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1294 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1295 } 1296 1297 else 1298 { 1299 CheckSpaceLeftInBuffer (d, destEnd, 6); 1300 *(d++) = (uint8) ( (aChar >> 30) | 0x000000FC); 1301 *(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080); 1302 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 1303 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1304 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1305 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1306 } 1307 1308 } 1309 1310 CheckSpaceLeftInBuffer (d, destEnd, 1); 1311 *d = 0; 1312 1313 Set (buffer.Buffer_char ()); 1314 1315 } 1316 1317/*****************************************************************************/ 1318 1319void dng_string::Clear () 1320 { 1321 1322 Set (NULL); 1323 1324 } 1325 1326/*****************************************************************************/ 1327 1328void dng_string::Truncate (uint32 maxBytes) 1329 { 1330 1331 uint32 len = Length (); 1332 1333 if (len > maxBytes) 1334 { 1335 1336 uint8 *s = fData.Buffer_uint8 (); 1337 1338 // Don't truncate on an extension character. Extensions characters 1339 // in UTF-8 have the 0x80 bit set and the 0x40 bit clear. 1340 1341 while (maxBytes > 0 && ((s [maxBytes]) & 0xC0) == 0x80) 1342 { 1343 1344 maxBytes--; 1345 1346 } 1347 1348 s [maxBytes] = 0; 1349 1350 } 1351 1352 } 1353 1354/*****************************************************************************/ 1355 1356bool dng_string::TrimTrailingBlanks () 1357 { 1358 1359 bool didTrim = false; 1360 1361 if (fData.Buffer ()) 1362 { 1363 1364 char *s = fData.Buffer_char (); 1365 1366 uint32 len = strlenAsUint32 (s); 1367 1368 while (len > 0 && s [len - 1] == ' ') 1369 { 1370 len--; 1371 didTrim = true; 1372 } 1373 1374 s [len] = 0; 1375 1376 } 1377 1378 return didTrim; 1379 1380 } 1381 1382/*****************************************************************************/ 1383 1384bool dng_string::TrimLeadingBlanks () 1385 { 1386 1387 bool didTrim = false; 1388 1389 const char *s = Get (); 1390 1391 while (*s == ' ') 1392 { 1393 s++; 1394 didTrim = true; 1395 } 1396 1397 if (didTrim) 1398 { 1399 Set (s); 1400 } 1401 1402 return didTrim; 1403 1404 } 1405 1406/*****************************************************************************/ 1407 1408bool dng_string::IsEmpty () const 1409 { 1410 1411 const char *s = Get (); 1412 1413 return *s == 0; 1414 1415 } 1416 1417/*****************************************************************************/ 1418 1419uint32 dng_string::Length () const 1420 { 1421 1422 const char *s = Get (); 1423 1424 return strlenAsUint32 (s); 1425 1426 } 1427 1428/*****************************************************************************/ 1429 1430bool dng_string::operator== (const dng_string &s) const 1431 { 1432 1433 const char *s1 = Get (); 1434 const char *s2 = s.Get (); 1435 1436 return strcmp (s1, s2) == 0; 1437 1438 } 1439 1440/*****************************************************************************/ 1441 1442bool dng_string::Matches (const char *t, 1443 const char *s, 1444 bool case_sensitive) 1445 { 1446 1447 while (*s != 0) 1448 { 1449 1450 char c1 = *(s++); 1451 char c2 = *(t++); 1452 1453 if (!case_sensitive) 1454 { 1455 c1 = ForceUppercase (c1); 1456 c2 = ForceUppercase (c2); 1457 } 1458 1459 if (c1 != c2) 1460 { 1461 return false; 1462 } 1463 1464 } 1465 1466 return (*t == 0); 1467 1468 } 1469 1470/*****************************************************************************/ 1471 1472bool dng_string::Matches (const char *s, 1473 bool case_sensitive) const 1474 { 1475 1476 return dng_string::Matches (Get (), s, case_sensitive); 1477 1478 } 1479 1480/*****************************************************************************/ 1481 1482bool dng_string::StartsWith (const char *s, 1483 bool case_sensitive) const 1484 { 1485 1486 const char *t = Get (); 1487 1488 while (*s != 0) 1489 { 1490 1491 char c1 = *(s++); 1492 char c2 = *(t++); 1493 1494 if (!case_sensitive) 1495 { 1496 c1 = ForceUppercase (c1); 1497 c2 = ForceUppercase (c2); 1498 } 1499 1500 if (c1 != c2) 1501 { 1502 return false; 1503 } 1504 1505 } 1506 1507 return true; 1508 1509 } 1510 1511/*****************************************************************************/ 1512 1513bool dng_string::EndsWith (const char *s, 1514 bool case_sensitive) const 1515 { 1516 1517 uint32 len1 = Length (); 1518 1519 uint32 len2 = strlenAsUint32 (s); 1520 1521 if (len1 < len2) 1522 { 1523 return false; 1524 } 1525 1526 const char *t = Get () + (len1 - len2); 1527 1528 while (*s != 0) 1529 { 1530 1531 char c1 = *(s++); 1532 char c2 = *(t++); 1533 1534 if (!case_sensitive) 1535 { 1536 c1 = ForceUppercase (c1); 1537 c2 = ForceUppercase (c2); 1538 } 1539 1540 if (c1 != c2) 1541 { 1542 return false; 1543 } 1544 1545 } 1546 1547 return true; 1548 1549 } 1550 1551/*****************************************************************************/ 1552 1553bool dng_string::Contains (const char *s, 1554 bool case_sensitive, 1555 int32 *match_offset) const 1556 { 1557 1558 if (match_offset) 1559 { 1560 *match_offset = -1; 1561 } 1562 1563 uint32 len1 = Length (); 1564 1565 uint32 len2 = strlenAsUint32 (s); 1566 1567 if (len1 < len2) 1568 { 1569 return false; 1570 } 1571 1572 uint32 offsets = len1 - len2; 1573 1574 for (uint32 offset = 0; offset <= offsets; offset++) 1575 { 1576 1577 const char *ss = s; 1578 const char *tt = Get () + offset; 1579 1580 while (*ss != 0) 1581 { 1582 1583 char c1 = *(ss++); 1584 char c2 = *(tt++); 1585 1586 if (!case_sensitive) 1587 { 1588 c1 = ForceUppercase (c1); 1589 c2 = ForceUppercase (c2); 1590 } 1591 1592 if (c1 != c2) 1593 { 1594 goto tryNextOffset; 1595 } 1596 1597 } 1598 1599 if (match_offset) 1600 { 1601 *match_offset = offset; 1602 } 1603 1604 return true; 1605 1606 tryNextOffset: ; 1607 1608 } 1609 1610 return false; 1611 1612 } 1613 1614/*****************************************************************************/ 1615 1616bool dng_string::Replace (const char *old_string, 1617 const char *new_string, 1618 bool case_sensitive) 1619 { 1620 1621 int32 match_offset = -1; 1622 1623 if (Contains (old_string, 1624 case_sensitive, 1625 &match_offset)) 1626 { 1627 1628 uint32 len1 = Length (); 1629 1630 uint32 len2 = strlenAsUint32 (old_string); 1631 uint32 len3 = strlenAsUint32 (new_string); 1632 1633 if (len2 == len3) 1634 { 1635 1636 strncpy (fData.Buffer_char () + match_offset, 1637 new_string, 1638 len3); 1639 1640 } 1641 1642 else if (len2 > len3) 1643 { 1644 1645 strncpy (fData.Buffer_char () + match_offset, 1646 new_string, 1647 len3); 1648 1649 const char *s = fData.Buffer_char () + match_offset + len2; 1650 char *d = fData.Buffer_char () + match_offset + len3; 1651 1652 uint32 extra = len1 - match_offset - len2 + 1; // + 1 for NULL termination 1653 1654 for (uint32 j = 0; j < extra; j++) 1655 { 1656 *(d++) = *(s++); 1657 } 1658 1659 } 1660 1661 else 1662 { 1663 1664 // "len1 - len2" cannot wrap around because we know that if this 1665 // string contains old_string, len1 >= len2 must hold. 1666 dng_memory_data tempBuffer ( 1667 SafeUint32Add (SafeUint32Add (len1 - len2, len3), 1)); 1668 1669 if (match_offset) 1670 { 1671 1672 strncpy (tempBuffer.Buffer_char (), 1673 fData .Buffer_char (), 1674 match_offset); 1675 1676 } 1677 1678 if (len3) 1679 { 1680 1681 strncpy (tempBuffer.Buffer_char () + match_offset, 1682 new_string, 1683 len3); 1684 1685 } 1686 1687 uint32 extra = len1 - match_offset - len2 + 1; // + 1 for NULL termination 1688 1689 strncpy (tempBuffer.Buffer_char () + match_offset + len3, 1690 fData .Buffer_char () + match_offset + len2, 1691 extra); 1692 1693 Set (tempBuffer.Buffer_char ()); 1694 1695 } 1696 1697 return true; 1698 1699 } 1700 1701 return false; 1702 1703 } 1704 1705/*****************************************************************************/ 1706 1707bool dng_string::TrimLeading (const char *s, 1708 bool case_sensitive) 1709 { 1710 1711 if (StartsWith (s, case_sensitive)) 1712 { 1713 1714 Set (Get () + strlenAsUint32 (s)); 1715 1716 return true; 1717 1718 } 1719 1720 return false; 1721 1722 } 1723 1724/*****************************************************************************/ 1725 1726void dng_string::Append (const char *s) 1727 { 1728 1729 uint32 len2 = strlenAsUint32 (s); 1730 1731 if (len2) 1732 { 1733 1734 uint32 len1 = Length (); 1735 1736 dng_memory_data temp (SafeUint32Add (SafeUint32Add (len1, len2), 1)); 1737 1738 char *buffer = temp.Buffer_char (); 1739 1740 if (len1) 1741 { 1742 memcpy (buffer, Get (), len1); 1743 } 1744 1745 memcpy (buffer + len1, s, len2 + 1); 1746 1747 Set (buffer); 1748 1749 } 1750 1751 } 1752 1753/*****************************************************************************/ 1754 1755void dng_string::SetUppercase () 1756 { 1757 1758 if (fData.Buffer ()) 1759 { 1760 1761 uint32 len = Length (); 1762 1763 char *dPtr = fData.Buffer_char (); 1764 1765 for (uint32 j = 0; j < len; j++) 1766 { 1767 1768 char c = dPtr [j]; 1769 1770 if (c >= 'a' && c <= 'z') 1771 { 1772 1773 dPtr [j] = c - 'a' + 'A'; 1774 1775 } 1776 1777 } 1778 1779 } 1780 1781 } 1782 1783/*****************************************************************************/ 1784 1785void dng_string::SetLowercase () 1786 { 1787 1788 if (fData.Buffer ()) 1789 { 1790 1791 uint32 len = Length (); 1792 1793 char *dPtr = fData.Buffer_char (); 1794 1795 for (uint32 j = 0; j < len; j++) 1796 { 1797 1798 char c = dPtr [j]; 1799 1800 if (c >= 'A' && c <= 'Z') 1801 { 1802 1803 dPtr [j] = c - 'A' + 'a'; 1804 1805 } 1806 1807 } 1808 1809 } 1810 1811 } 1812 1813/*****************************************************************************/ 1814 1815void dng_string::SetLineEndings (char ending) 1816 { 1817 1818 if (fData.Buffer ()) 1819 { 1820 1821 const char *sPtr = fData.Buffer_char (); 1822 char *dPtr = fData.Buffer_char (); 1823 1824 while (*sPtr) 1825 { 1826 1827 char c = *(sPtr++); 1828 1829 char nc = sPtr [0]; 1830 1831 if ((c == '\r' && nc == '\n') || 1832 (c == '\n' && nc == '\r')) 1833 { 1834 1835 sPtr++; 1836 1837 if (ending) 1838 { 1839 *(dPtr++) = ending; 1840 } 1841 1842 } 1843 1844 else if (c == '\n' || 1845 c == '\r') 1846 { 1847 1848 if (ending) 1849 { 1850 *(dPtr++) = ending; 1851 } 1852 1853 } 1854 1855 else 1856 { 1857 1858 *(dPtr++) = c; 1859 1860 } 1861 1862 } 1863 1864 *dPtr = 0; 1865 1866 } 1867 1868 } 1869 1870/*****************************************************************************/ 1871 1872void dng_string::StripLowASCII () 1873 { 1874 1875 if (fData.Buffer ()) 1876 { 1877 1878 const char *sPtr = fData.Buffer_char (); 1879 char *dPtr = fData.Buffer_char (); 1880 1881 while (*sPtr) 1882 { 1883 1884 char c = *(sPtr++); 1885 1886 if (c == '\r' || c == '\n' || (uint8) c >= ' ') 1887 { 1888 1889 *(dPtr++) = c; 1890 1891 } 1892 1893 } 1894 1895 *dPtr = 0; 1896 1897 } 1898 1899 } 1900 1901/*****************************************************************************/ 1902 1903void dng_string::NormalizeAsCommaSeparatedNumbers () 1904 { 1905 1906 if (fData.Buffer ()) 1907 { 1908 1909 const char *sPtr = fData.Buffer_char (); 1910 char *dPtr = fData.Buffer_char (); 1911 1912 bool commaInserted = false; 1913 1914 while (*sPtr) 1915 { 1916 1917 uint32 c = DecodeUTF8 (sPtr); 1918 1919 // Support number formats such as "3", "+3.0", "-3.1416", "314.16e-2", 1920 // "0.31416E1", but no hex/octal number representations. 1921 1922 if (isdigit ((int) c) || c == '.' || c == '-' || c == '+' || c == 'e' || c == 'E') 1923 { 1924 1925 *(dPtr++) = (char) c; 1926 1927 if (commaInserted) 1928 { 1929 1930 commaInserted = false; 1931 1932 } 1933 1934 } 1935 1936 else if (!commaInserted) 1937 { 1938 1939 *(dPtr++) = ','; 1940 1941 commaInserted = true; 1942 1943 } 1944 1945 } 1946 1947 *dPtr = 0; 1948 1949 } 1950 1951 } 1952 1953/******************************************************************************/ 1954 1955// Unicode to low-ASCII strings table. 1956 1957struct UnicodeToLowASCIIEntry 1958 { 1959 uint32 unicode; 1960 const char *ascii; 1961 }; 1962 1963static const UnicodeToLowASCIIEntry kUnicodeToLowASCII [] = 1964 { 1965 { 0x00A0, " " }, 1966 { 0x00A1, "!" }, 1967 { 0x00A9, "(C)" }, 1968 { 0x00AA, "a" }, 1969 { 0x00AB, "<<" }, 1970 { 0x00AC, "!" }, 1971 { 0x00AE, "(R)" }, 1972 { 0x00B0, "dg" }, 1973 { 0x00B1, "+-" }, 1974 { 0x00B7, "." }, 1975 { 0x00BA, "o" }, 1976 { 0x00BB, ">>" }, 1977 { 0x00BF, "?" }, 1978 { 0x00C0, "A" }, 1979 { 0x00C1, "A" }, 1980 { 0x00C2, "A" }, 1981 { 0x00C3, "A" }, 1982 { 0x00C4, "A" }, 1983 { 0x00C5, "A" }, 1984 { 0x00C6, "AE" }, 1985 { 0x00C7, "C" }, 1986 { 0x00C8, "E" }, 1987 { 0x00C9, "E" }, 1988 { 0x00CA, "E" }, 1989 { 0x00CB, "E" }, 1990 { 0x00CC, "I" }, 1991 { 0x00CD, "I" }, 1992 { 0x00CE, "I" }, 1993 { 0x00CF, "I" }, 1994 { 0x00D1, "N" }, 1995 { 0x00D2, "O" }, 1996 { 0x00D3, "O" }, 1997 { 0x00D4, "O" }, 1998 { 0x00D5, "O" }, 1999 { 0x00D6, "O" }, 2000 { 0x00D8, "O" }, 2001 { 0x00D9, "U" }, 2002 { 0x00DA, "U" }, 2003 { 0x00DB, "U" }, 2004 { 0x00DC, "U" }, 2005 { 0x00DD, "Y" }, 2006 { 0x00E0, "a" }, 2007 { 0x00E1, "a" }, 2008 { 0x00E2, "a" }, 2009 { 0x00E3, "a" }, 2010 { 0x00E4, "a" }, 2011 { 0x00E5, "a" }, 2012 { 0x00E6, "ae" }, 2013 { 0x00E7, "c" }, 2014 { 0x00E8, "e" }, 2015 { 0x00E9, "e" }, 2016 { 0x00EA, "e" }, 2017 { 0x00EB, "e" }, 2018 { 0x00EC, "i" }, 2019 { 0x00ED, "i" }, 2020 { 0x00EE, "i" }, 2021 { 0x00EF, "i" }, 2022 { 0x00F1, "n" }, 2023 { 0x00F2, "o" }, 2024 { 0x00F3, "o" }, 2025 { 0x00F4, "o" }, 2026 { 0x00F5, "o" }, 2027 { 0x00F6, "o" }, 2028 { 0x00F7, "/" }, 2029 { 0x00F8, "o" }, 2030 { 0x00F9, "u" }, 2031 { 0x00FA, "u" }, 2032 { 0x00FB, "u" }, 2033 { 0x00FC, "u" }, 2034 { 0x00FD, "y" }, 2035 { 0x00FF, "y" }, 2036 { 0x0131, "i" }, 2037 { 0x0152, "OE" }, 2038 { 0x0153, "oe" }, 2039 { 0x0178, "Y" }, 2040 { 0x2013, "-" }, 2041 { 0x2014, "-" }, 2042 { 0x2018, "'" }, 2043 { 0x2019, "'" }, 2044 { 0x201A, "," }, 2045 { 0x201C, "\"" }, 2046 { 0x201D, "\"" }, 2047 { 0x201E, ",," }, 2048 { 0x2022, "." }, 2049 { 0x2026, "..." }, 2050 { 0x2039, "<" }, 2051 { 0x203A, ">" }, 2052 { 0x2044, "/" }, 2053 { 0x2122, "TM" }, 2054 { 0x2206, "d" }, 2055 { 0x2211, "S" }, 2056 { 0x2260, "!=" }, 2057 { 0x2264, "<=" }, 2058 { 0x2265, ">=" }, 2059 { 0x2318, "#" }, 2060 { 0xFB01, "fi" }, 2061 { 0xFB02, "fl" } 2062 }; 2063 2064/******************************************************************************/ 2065 2066void dng_string::ForceASCII () 2067 { 2068 2069 if (!IsASCII ()) 2070 { 2071 2072 uint32 tempBufferSize = 2073 SafeUint32Add (SafeUint32Mult(Length(), 3), 1); 2074 dng_memory_data tempBuffer (tempBufferSize); 2075 2076 char *dPtr = tempBuffer.Buffer_char (); 2077 char * const destEnd = dPtr + tempBufferSize; 2078 2079 const char *sPtr = Get (); 2080 2081 while (*sPtr) 2082 { 2083 2084 uint32 x = DecodeUTF8 (sPtr); 2085 2086 if (x <= 0x007F) 2087 { 2088 2089 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2090 *(dPtr++) = (char) x; 2091 2092 } 2093 2094 else 2095 { 2096 2097 const char *ascii = NULL; 2098 2099 const uint32 kTableEntrys = sizeof (kUnicodeToLowASCII ) / 2100 sizeof (kUnicodeToLowASCII [0]); 2101 2102 for (uint32 entry = 0; entry < kTableEntrys; entry++) 2103 { 2104 2105 if (kUnicodeToLowASCII [entry] . unicode == x) 2106 { 2107 2108 ascii = kUnicodeToLowASCII [entry] . ascii; 2109 2110 break; 2111 2112 } 2113 2114 } 2115 2116 if (ascii) 2117 { 2118 2119 while (*ascii) 2120 { 2121 2122 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2123 *(dPtr++) = *(ascii++); 2124 2125 } 2126 2127 } 2128 2129 else 2130 { 2131 2132 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2133 *(dPtr++) ='?'; 2134 2135 } 2136 2137 } 2138 2139 } 2140 2141 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2142 *dPtr = 0; 2143 2144 Set (tempBuffer.Buffer_char ()); 2145 2146 } 2147 2148 } 2149 2150/******************************************************************************/ 2151 2152static dng_mutex gProtectUCCalls ("gProtectUCCalls"); 2153 2154/******************************************************************************/ 2155 2156int32 dng_string::Compare (const dng_string &s) const 2157 { 2158 2159 #if qMacOS 2160 #if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 2161 2162 // TODO: Needs implementation. 2163 ThrowProgramError ("Compare() not implemented on iOS"); 2164 return 0; 2165 2166 #else 2167 2168 { 2169 2170 dng_memory_data aStrA; 2171 dng_memory_data aStrB; 2172 2173 uint32 aLenA = this->Get_UTF16 (aStrA); 2174 uint32 aLenB = s .Get_UTF16 (aStrB); 2175 2176 if (aLenA > 0) 2177 { 2178 2179 if (aLenB > 0) 2180 { 2181 2182 // For some Mac OS versions anyway, UCCompareTextDefault is not 2183 // thread safe. 2184 2185 dng_lock_mutex lockMutex (&gProtectUCCalls); 2186 2187 UCCollateOptions aOptions = kUCCollateStandardOptions | 2188 kUCCollatePunctuationSignificantMask; 2189 2190 SInt32 aOrder = -1; 2191 2192 Boolean aEqual = false; 2193 2194 OSStatus searchStatus = ::UCCompareTextDefault (aOptions, 2195 aStrA.Buffer_uint16 (), 2196 aLenA, 2197 aStrB.Buffer_uint16 (), 2198 aLenB, 2199 &aEqual, 2200 &aOrder); 2201 2202 if (searchStatus == noErr) 2203 { 2204 2205 if (aEqual || (aOrder == 0)) 2206 { 2207 return 0; 2208 } 2209 2210 else 2211 { 2212 return (aOrder > 0) ? 1 : -1; 2213 } 2214 2215 } 2216 2217 else 2218 { 2219 2220 DNG_REPORT ("UCCompareTextDefault failed"); 2221 2222 return -1; 2223 2224 } 2225 2226 } 2227 2228 else 2229 { 2230 return 1; 2231 } 2232 2233 } 2234 2235 else 2236 { 2237 2238 if (aLenB > 0) 2239 { 2240 return -1; 2241 } 2242 2243 else 2244 { 2245 return 0; 2246 } 2247 2248 } 2249 2250 } 2251 2252 #endif // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR 2253 2254 #elif qWinOS 2255 2256 { 2257 2258 dng_memory_data aStrA; 2259 dng_memory_data aStrB; 2260 2261 uint32 aLenA = this->Get_UTF16 (aStrA); 2262 uint32 aLenB = s .Get_UTF16 (aStrB); 2263 2264 if (aLenA > 0) 2265 { 2266 2267 if (aLenB > 0) 2268 { 2269 2270 LCID locale = LOCALE_SYSTEM_DEFAULT; 2271 2272 DWORD aFlags = NORM_IGNOREWIDTH; 2273 2274 int aOrder = ::CompareStringW (locale, 2275 aFlags, 2276 (const WCHAR *) aStrA.Buffer_uint16 (), 2277 aLenA, 2278 (const WCHAR *) aStrB.Buffer_uint16 (), 2279 aLenB); 2280 2281 if (aOrder == CSTR_EQUAL) 2282 { 2283 return 0; 2284 } 2285 2286 else if (aOrder == CSTR_GREATER_THAN) 2287 { 2288 return 1; 2289 } 2290 2291 else 2292 { 2293 return -1; 2294 } 2295 2296 } 2297 2298 else 2299 { 2300 return 1; 2301 } 2302 2303 } 2304 2305 else 2306 { 2307 2308 if (aLenB > 0) 2309 { 2310 return -1; 2311 } 2312 else 2313 { 2314 return 0; 2315 } 2316 2317 } 2318 2319 } 2320 2321 #else 2322 2323 // Fallback to a pure Unicode sort order. 2324 2325 { 2326 2327 for (uint32 pass = 0; pass < 2; pass++) 2328 { 2329 2330 const char *aPtr = Get (); 2331 const char *bPtr = s.Get (); 2332 2333 while (*aPtr || *bPtr) 2334 { 2335 2336 if (!bPtr) 2337 { 2338 return 1; 2339 } 2340 2341 else if (!aPtr) 2342 { 2343 return -1; 2344 } 2345 2346 uint32 a = DecodeUTF8 (aPtr); 2347 uint32 b = DecodeUTF8 (bPtr); 2348 2349 // Ignore case on first compare pass. 2350 2351 if (pass == 0) 2352 { 2353 2354 if (a >= (uint32) 'a' && a <= (uint32) 'z') 2355 { 2356 a = a - (uint32) 'a' + (uint32) 'A'; 2357 } 2358 2359 if (b >= (uint32) 'a' && b <= (uint32) 'z') 2360 { 2361 b = b - (uint32) 'a' + (uint32) 'A'; 2362 } 2363 2364 } 2365 2366 if (b > a) 2367 { 2368 return 1; 2369 } 2370 2371 else if (a < b) 2372 { 2373 return -1; 2374 } 2375 2376 } 2377 2378 } 2379 2380 } 2381 2382 #endif 2383 2384 return 0; 2385 2386 } 2387 2388/*****************************************************************************/ 2389