dng_string.cpp revision 29c7498fabe2e3c87a85b487bfe9d783c401e1f0
1/*****************************************************************************/
2// Copyright 2006-2007 Adobe Systems Incorporated
3// All Rights Reserved.
4//
5// NOTICE:  Adobe permits you to use, modify, and distribute this file in
6// accordance with the terms of the Adobe license agreement accompanying it.
7/*****************************************************************************/
8
9/* $Id: //mondo/dng_sdk_1_4/dng_sdk/source/dng_string.cpp#2 $ */
10/* $DateTime: 2012/07/31 22:04:34 $ */
11/* $Change: 840853 $ */
12/* $Author: tknoll $ */
13
14/*****************************************************************************/
15
16#include "dng_string.h"
17
18#include "dng_assertions.h"
19#include "dng_exceptions.h"
20#include "dng_flags.h"
21#include "dng_mutex.h"
22#include "dng_utils.h"
23#include "dng_safe_arithmetic.h"
24
25#if qMacOS
26#include <TargetConditionals.h>
27#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
28#include <MobileCoreServices/MobileCoreServices.h>
29#else
30#include <CoreServices/CoreServices.h>
31#endif  // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
32#endif  // qMacOS
33
34#if qWinOS
35#include <windows.h>
36#endif
37
38#if qiPhone || qAndroid || qLinux
39#include <ctype.h> // for isdigit
40#endif
41
42/*****************************************************************************/
43
44const uint32 kREPLACEMENT_CHARACTER	= 0x0000FFFD;
45
46/*****************************************************************************/
47
48// Returns the length of the zero-terminated string 's'. Throws a dng_exception
49// if the length of 's' is too large to be represented as a uint32_t.
50static uint32 strlenAsUint32(const char *s)
51	{
52
53	uint32 lengthAsUint32 = 0;
54	ConvertUnsigned(strlen(s), &lengthAsUint32);
55
56	return lengthAsUint32;
57
58	}
59
60// Checks whether there is enough space left in the buffer pointed to by
61// 'currentPos' to write at least 'space' elements of type T (to positions
62// currentPos[0] through currentPos[space - 1]. Throws a dng_exception if there
63// is not enough space left in the buffer.
64// 'bufferEnd' should point one element beyond the end of the buffer. For
65// example, if the buffer is "T buffer[3];", then bufferEnd should point to
66// T + 3.
67template <class T>
68static void CheckSpaceLeftInBuffer(const T *currentPos,
69								   const T *bufferEnd,
70								   size_t space)
71	{
72
73	if (bufferEnd < currentPos || static_cast<size_t>(bufferEnd - currentPos) < space)
74		{
75		ThrowMemoryFull ("Buffer overrun");
76		}
77
78	}
79
80/*****************************************************************************/
81
82// Throws an exception to notify the user of code that has not been security
83// hardened and prevent execution of that code.
84//
85// Though the DNG SDK in general has been security-hardened, this does not apply
86// to the following Mac-OS- and Windows-specific functions. Calls to
87// ThrowNotHardened() have been added to these functions to alert callers of
88// this fact.
89static void ThrowNotHardened()
90	{
91	ThrowProgramError ("This function has not been security-hardened");
92	}
93
94#if qMacOS
95#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
96
97static uint32 Extract_SystemEncoding (const dng_string &dngString,
98							   		  dng_memory_data &buffer)
99	{
100		ThrowProgramError ("Extract_SystemEncoding() not implemented on iOS");
101		return 0;
102	}
103
104static void Assign_SystemEncoding (dng_string &dngString,
105							       const char *otherString)
106	{
107		ThrowProgramError ("Assign_SystemEncoding() not implemented on iOS");
108
109	}
110
111static void Assign_JIS_X208_1990 (dng_string &dngString,
112							      const char *otherString)
113	{
114		ThrowProgramError ("Assign_JIS_X208_1990() not implemented on iOS");
115	}
116
117#else
118
119static void Assign_Multibyte (dng_string &dngString,
120							  const char *otherString,
121							  TextEncoding encoding)
122	{
123
124	// This function contains security-vulnerable code. Do not use.
125	// The particular vulnerabilities are:
126	// - Casting the result of strlen() to a uint32 may case truncation. (Use
127	//   strlenAsUint32() instead.)
128	// - The computation of aBufSize and the subsequent addition of 1 in the
129	//   call to the dng_memory_data constructor may wrap around.
130	ThrowNotHardened();
131
132	uint32 aSize = (uint32) strlen (otherString);
133
134	if (aSize > 0)
135		{
136
137		uint32 aBufSize = aSize * 6 + 256;
138
139		dng_memory_data aBuf (aBufSize + 1);
140
141		UnicodeMapping aMapping;
142
143		aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0,
144														 kUnicodeNoSubset,
145														 kUnicodeUTF8Format);
146
147		aMapping.otherEncoding   = encoding;
148		aMapping.mappingVersion  = kUnicodeUseLatestMapping;
149
150		TextToUnicodeInfo aInfo = NULL;
151
152		if (::CreateTextToUnicodeInfo (&aMapping, &aInfo) == noErr)
153			{
154
155			ByteCount aInput  = 0;
156			ByteCount aOutput = 0;
157
158			::ConvertFromTextToUnicode (aInfo,
159										aSize,
160									    otherString,
161									    kUnicodeUseFallbacksMask |
162									    kUnicodeLooseMappingsMask,
163									    0,
164									    NULL,
165									    NULL,
166									    NULL,
167									    aBufSize,
168									    &aInput,
169									    &aOutput,
170									    (UniChar *) aBuf.Buffer ());
171
172			::DisposeTextToUnicodeInfo (&aInfo);
173
174			if (aOutput > 0 && aOutput <= aBufSize)
175				{
176
177				char *aBufChar = aBuf.Buffer_char ();
178
179				aBufChar [aOutput] = 0;
180
181				dngString.Set (aBufChar);
182
183				return;
184
185				}
186
187			}
188
189		}
190
191	dngString.Clear ();
192
193	}
194
195static uint32 Extract_Multibyte (const dng_string &dngString,
196							     dng_memory_data &buffer,
197							     TextEncoding encoding)
198	{
199
200	// This function contains security-vulnerable code. Do not use.
201	// The particular vulnerabilities are:
202	// - The computation of aBufSize may wrap around.
203	// - The computation of the argument to buffer.Allocate() may overflow; the
204	//   conversion to uint32 is also problematic.
205	// - The signed-to-unsigned conversion in the return statement "
206	//   return (uint32) aOutput;" may be problematic.
207	ThrowNotHardened();
208
209	uint32 aSize = dngString.Length ();
210
211	if (aSize > 0)
212		{
213
214		uint32 aBufSize = (aSize * 2) + 256;
215
216		dng_memory_data tempBuffer (aBufSize);
217
218		UnicodeMapping aMapping;
219
220		aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0,
221														 kUnicodeNoSubset,
222														 kUnicodeUTF8Format);
223
224		aMapping.otherEncoding   = encoding;
225		aMapping.mappingVersion  = kUnicodeUseLatestMapping;
226
227		UnicodeToTextInfo aInfo = NULL;
228
229		if (::CreateUnicodeToTextInfo (&aMapping, &aInfo) == noErr)
230			{
231
232			ByteCount aInput  = 0;
233			ByteCount aOutput = 0;
234
235			::ConvertFromUnicodeToText (aInfo,
236										aSize,
237										(const UniChar *) dngString.Get (),
238									    kUnicodeUseFallbacksMask  |
239									    kUnicodeLooseMappingsMask |
240									    kUnicodeDefaultDirectionMask,
241									    0,
242									    NULL,
243									    NULL,
244									    NULL,
245									    aBufSize,
246									    &aInput,
247									    &aOutput,
248									    tempBuffer.Buffer_char ());
249
250			::DisposeUnicodeToTextInfo (&aInfo);
251
252			if (aOutput > 0)
253				{
254
255				buffer.Allocate ((uint32) (aOutput + 1));
256
257				memcpy (buffer.Buffer (),
258						tempBuffer.Buffer (),
259						aOutput);
260
261				buffer.Buffer_char () [aOutput] = 0;
262
263				return (uint32) aOutput;
264
265				}
266
267			}
268
269		}
270
271	buffer.Allocate (1);
272
273	buffer.Buffer_char () [0] = 0;
274
275	return 0;
276
277	}
278
279static void Assign_SystemEncoding (dng_string &dngString,
280							       const char *otherString)
281	{
282
283	TextEncoding aEncoding;
284
285	::UpgradeScriptInfoToTextEncoding (smSystemScript,
286									   kTextLanguageDontCare,
287									   kTextRegionDontCare,
288									   NULL,
289									   &aEncoding);
290
291	Assign_Multibyte (dngString,
292					  otherString,
293					  aEncoding);
294
295	}
296
297static uint32 Extract_SystemEncoding (const dng_string &dngString,
298							   		  dng_memory_data &buffer)
299	{
300
301	TextEncoding aEncoding;
302
303	::UpgradeScriptInfoToTextEncoding (smSystemScript,
304									   kTextLanguageDontCare,
305									   kTextRegionDontCare,
306									   NULL,
307									   &aEncoding);
308
309	return Extract_Multibyte (dngString,
310					   		  buffer,
311					   		  aEncoding);
312
313	}
314
315static void Assign_JIS_X208_1990 (dng_string &dngString,
316							      const char *otherString)
317	{
318
319	Assign_Multibyte (dngString,
320					  otherString,
321					  kTextEncodingJIS_X0208_90);
322
323	}
324
325#endif  // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
326#endif  // qMacOS
327
328/*****************************************************************************/
329
330#if qWinOS
331
332static void Assign_Multibyte (dng_string &dngString,
333							  const char *otherString,
334							  UINT encoding)
335	{
336
337	// This function contains security-vulnerable code. Do not use.
338	// The particular vulnerabilities are:
339	// - Converting the return value of strlen() to int may cause overflow.
340	// - The computation of aBufChars and of the argument to the dng_memory_data
341	//   constructor may overflow. Additionally, there is an implicit
342	//   signed-to-unsigned conversion in the call to the dng_memory_data
343	//   constructor.
344	ThrowNotHardened();
345
346	DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes");
347
348	int aSize = (int) strlen (otherString);
349
350	if (aSize > 0)
351		{
352
353		int aBufChars = aSize * 3 + 128;
354
355		dng_memory_data aBuf ((aBufChars + 1) << 1);
356
357		int aResult = ::MultiByteToWideChar (encoding,
358											 0,
359											 otherString,
360											 aSize,
361											 (WCHAR *) aBuf.Buffer (),
362											 aBufChars);
363
364		if (aResult > 0 && aResult <= aBufChars)
365			{
366
367			uint16 * aUTF16 = aBuf.Buffer_uint16 ();
368
369			aUTF16 [aResult] = 0;
370
371			dngString.Set_UTF16 (aUTF16);
372
373			return;
374
375			}
376
377		}
378
379	dngString.Clear ();
380
381	}
382
383static uint32 Extract_Multibyte (const dng_string &dngString,
384							     dng_memory_data &buffer,
385							     UINT encoding)
386	{
387
388	// This function contains security-vulnerable code. Do not use.
389	// The particular vulnerabilities are:
390	// - Converting the return value of dngString.Get_UTF16() may cause
391	//   overflow.
392	// - The computation of dBufSize may overflow.
393	// - The calls to the dng_memory_data constructor and to buffer.Allocate()
394	//   trigger implicit conversions of int to uint32 that may be problematic.
395	// - The memcpy() call triggers an implicit conversion of aResult to a
396	//   size_t, which may be problematic.
397	// - The conversion of aResult to a uint32 in the return statement may be
398	//   problematic.
399	ThrowNotHardened();
400
401	DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes");
402
403	dng_memory_data sBuffer;
404
405	int aCount = dngString.Get_UTF16 (sBuffer);
406
407	int dBufSize = aCount * 2 + 256;
408
409	dng_memory_data dBuffer (dBufSize);
410
411	int aResult = ::WideCharToMultiByte (encoding,
412										 0,
413										 (WCHAR *) sBuffer.Buffer (),
414										 aCount,
415										 dBuffer.Buffer_char (),
416										 dBufSize,
417										 NULL,
418										 NULL);
419
420	if (aResult < 0)
421		aResult = 0;
422
423	buffer.Allocate (aResult + 1);
424
425	memcpy (buffer.Buffer (),
426			dBuffer.Buffer (),
427			aResult);
428
429	buffer.Buffer_char () [aResult] = 0;
430
431	return (uint32) aResult;
432
433	}
434
435static void Assign_SystemEncoding (dng_string &dngString,
436							       const char *otherString)
437	{
438
439	Assign_Multibyte (dngString,
440					  otherString,
441					  ::GetACP ());
442
443	}
444
445static uint32 Extract_SystemEncoding (const dng_string &dngString,
446							   		  dng_memory_data &buffer)
447	{
448
449	return Extract_Multibyte (dngString,
450					   		  buffer,
451					   		  ::GetACP ());
452
453	}
454
455static void Assign_JIS_X208_1990 (dng_string &dngString,
456							      const char *otherString)
457	{
458
459	// From MSDN documentation: 20932 = JIS X 0208-1990 & 0121-1990
460
461	const UINT kJIS = 20932;
462
463	Assign_Multibyte (dngString,
464					  otherString,
465					  kJIS);
466
467	}
468
469#endif
470
471/*****************************************************************************/
472
473static bool IsASCII (const char *s)
474	{
475
476	if (!s)
477		{
478
479		return true;
480
481		}
482
483	while (true)
484		{
485
486		uint8 c = (uint8) *(s++);
487
488		if (c == 0)
489			{
490
491			break;
492
493			}
494
495		if (c & 0x80)
496			{
497
498			return false;
499
500			}
501
502		}
503
504	return true;
505
506	}
507
508/*****************************************************************************/
509
510dng_string::dng_string ()
511
512	:	fData ()
513
514	{
515
516	}
517
518/*****************************************************************************/
519
520dng_string::dng_string (const dng_string &s)
521
522	:	fData ()
523
524	{
525
526	Set (s.Get ());
527
528	}
529
530/*****************************************************************************/
531
532dng_string & dng_string::operator= (const dng_string &s)
533	{
534
535	if (this != &s)
536		{
537
538		Set (s.Get ());
539
540		}
541
542	return *this;
543
544	}
545
546/*****************************************************************************/
547
548dng_string::~dng_string ()
549	{
550
551	}
552
553/*****************************************************************************/
554
555const char * dng_string::Get () const
556	{
557
558	if (fData.Buffer ())
559		{
560
561		return fData.Buffer_char ();
562
563		}
564
565	return "";
566
567	}
568
569/*****************************************************************************/
570
571bool dng_string::IsASCII () const
572	{
573
574	return ::IsASCII (Get ());
575
576	}
577
578/*****************************************************************************/
579
580void dng_string::Set (const char *s)
581	{
582
583	// Measure the new length.
584
585	uint32 newLen = (s != NULL ? strlenAsUint32 (s) : 0);
586
587	// If it is a NULL string, then clear the buffer.
588
589	if (newLen == 0)
590		{
591
592		fData.Clear ();
593
594		}
595
596	// Else we need to copy the bytes.
597
598	else
599		{
600
601		uint32 oldLen = Length ();
602
603		// We might be setting this string to a sub-string of itself,
604		// so don't reallocate the data unless the string is getting
605		// longer.
606
607		if (newLen > oldLen)
608			{
609
610			fData.Clear ();
611
612			fData.Allocate (SafeUint32Add (newLen, 1));
613
614			}
615
616		char *d = fData.Buffer_char ();
617
618		for (uint32 k = 0; k <= newLen; k++)
619			{
620
621			d [k] = s [k];
622
623			}
624
625		}
626
627	}
628
629/*****************************************************************************/
630
631void dng_string::Set_ASCII (const char *s)
632	{
633
634	if (::IsASCII (s))
635		{
636
637		Set (s);
638
639		}
640
641	else
642		{
643
644		Set_SystemEncoding (s);
645
646		}
647
648	}
649
650/*****************************************************************************/
651
652void dng_string::Set_UTF8 (const char *s)
653	{
654
655	uint32 len = strlenAsUint32 (s);
656
657	const char *sEnd = s + len;
658
659	// Worst case expansion is 1-byte characters expanding to
660	// replacement character, which requires 3 bytes.
661
662	const uint32 destBufferLength = SafeUint32Add (SafeUint32Mult (len, 3), 1);
663	dng_memory_data buffer (destBufferLength);
664
665	uint8 *d = buffer.Buffer_uint8 ();
666	uint8 * const destEnd = d + destBufferLength;
667
668	while (s < sEnd)
669		{
670
671		uint32 aChar = DecodeUTF8 (s, (uint32) (sEnd - s));
672
673		if (aChar > 0x7FFFFFFF)
674			{
675			aChar = kREPLACEMENT_CHARACTER;
676			}
677
678		#if qDNGValidate
679
680		if (aChar == kREPLACEMENT_CHARACTER)
681			{
682			ReportWarning ("Expected UTF-8 value is not valid UTF-8 (or contains a kREPLACEMENT_CHARACTER)");
683			}
684
685		#endif
686
687		if (aChar < 0x00000080)
688			{
689			CheckSpaceLeftInBuffer (d, destEnd, 1);
690			*(d++) = (uint8) aChar;
691			}
692
693		else if (aChar < 0x00000800)
694			{
695			CheckSpaceLeftInBuffer (d, destEnd, 2);
696			*(d++) = (uint8) ((aChar >> 6) | 0x000000C0);
697			*(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080);
698			}
699
700		else if (aChar < 0x00010000)
701			{
702			CheckSpaceLeftInBuffer (d, destEnd, 3);
703			*(d++) = (uint8) ( (aChar >> 12) | 0x000000E0);
704			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
705			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
706			}
707
708		else if (aChar < 0x00200000)
709			{
710			CheckSpaceLeftInBuffer (d, destEnd, 4);
711			*(d++) = (uint8) ( (aChar >> 18) | 0x000000F0);
712			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
713			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
714			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
715			}
716
717		else if (aChar < 0x04000000)
718			{
719			CheckSpaceLeftInBuffer (d, destEnd, 5);
720			*(d++) = (uint8) ( (aChar >> 24) | 0x000000F8);
721			*(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
722			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
723			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
724			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
725			}
726
727		else
728			{
729			CheckSpaceLeftInBuffer (d, destEnd, 6);
730			*(d++) = (uint8) ( (aChar >> 30) | 0x000000FC);
731			*(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080);
732			*(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
733			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
734			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
735			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
736			}
737
738		}
739
740	CheckSpaceLeftInBuffer (d, destEnd, 1);
741	*d = 0;
742
743	Set (buffer.Buffer_char ());
744
745	}
746
747/*****************************************************************************/
748
749uint32 dng_string::Get_SystemEncoding (dng_memory_data &buffer) const
750	{
751
752	if (IsASCII ())
753		{
754
755		uint32 len = Length ();
756
757		const uint32 destBufferLength = SafeUint32Add (len, 1);
758		buffer.Allocate (destBufferLength);
759
760		memcpy (buffer.Buffer (), Get (), destBufferLength);
761
762		return len;
763
764		}
765
766	else
767		{
768
769		#if qMacOS || qWinOS
770
771		return Extract_SystemEncoding (*this, buffer);
772
773		#else
774
775		// Fallback logic to force the string to ASCII.
776
777		dng_string temp (*this);
778
779		temp.ForceASCII ();
780
781		return temp.Get_SystemEncoding (buffer);
782
783		#endif
784
785		}
786
787	}
788
789/*****************************************************************************/
790
791void dng_string::Set_SystemEncoding (const char *s)
792	{
793
794	if (::IsASCII (s))
795		{
796
797		Set (s);
798
799		}
800
801	else
802		{
803
804		#if qMacOS || qWinOS
805
806		Assign_SystemEncoding (*this, s);
807
808		#else
809
810		// Fallback logic that just grabs the ASCII characters and
811		// ignores the non-ASCII characters.
812
813		uint32 len = strlenAsUint32 (s);
814
815		const uint32 destBufferLength = SafeUint32Add (len, 1);
816		dng_memory_data buffer (destBufferLength);
817
818		uint8 *d = buffer.Buffer_uint8 ();
819		uint8 * const destEnd = d + destBufferLength;
820
821		while (*s)
822			{
823
824			uint8 c = (uint8) *(s++);
825
826			if ((c & 0x80) == 0)
827				{
828
829				CheckSpaceLeftInBuffer (d, destEnd, 1);
830				*(d++) = c;
831
832				}
833
834			}
835
836		CheckSpaceLeftInBuffer (d, destEnd, 1);
837		*d = 0;
838
839		Set (buffer.Buffer_char ());
840
841		#endif
842
843		}
844
845	}
846
847/*****************************************************************************/
848
849bool dng_string::ValidSystemEncoding () const
850	{
851
852	if (IsASCII ())
853		{
854
855		return true;
856
857		}
858
859	dng_memory_data buffer;
860
861	Get_SystemEncoding (buffer);
862
863	dng_string temp;
864
865	temp.Set_SystemEncoding (buffer.Buffer_char ());
866
867	return (*this == temp);
868
869	}
870
871/*****************************************************************************/
872
873void dng_string::Set_JIS_X208_1990 (const char *s)
874	{
875
876	if (::IsASCII (s))
877		{
878
879		Set (s);
880
881		}
882
883	else
884		{
885
886		#if qMacOS || qWinOS
887
888		Assign_JIS_X208_1990 (*this, s);
889
890		#else
891
892		// Fallback to the ASCII extraction logic.
893
894		Set_SystemEncoding (s);
895
896		#endif
897
898		}
899
900	}
901
902/*****************************************************************************/
903
904#if defined(__clang__) && defined(__has_attribute)
905#if __has_attribute(no_sanitize)
906__attribute__((no_sanitize("unsigned-integer-overflow")))
907#endif
908#endif
909uint32 dng_string::DecodeUTF8 (const char *&s,
910							   uint32 maxBytes,
911							   bool *isValid)
912	{
913
914	static const uint8 gUTF8Bytes [256] =
915		{
916		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
917		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
918		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
919		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
920		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
921		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
922		2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
923		3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6
924		};
925
926	if (isValid)
927		{
928		*isValid = true;
929		}
930
931	const uint8 *nBuf = (const uint8 *) s;
932
933	uint32 aChar = nBuf [0];
934
935	uint32 aSize = gUTF8Bytes [aChar];
936
937	if (aSize > maxBytes)
938		{
939
940		s += maxBytes;
941
942		if (isValid)
943			{
944			*isValid = false;
945			}
946
947		return kREPLACEMENT_CHARACTER;
948
949		}
950
951	s += aSize;
952
953	for (uint32 extra = 1; extra < aSize; extra++)
954		{
955
956		if ((nBuf [extra] & 0xC0) != 0x80)
957			{
958
959			if (isValid)
960				{
961				*isValid = false;
962				}
963
964			return kREPLACEMENT_CHARACTER;
965
966			}
967
968		}
969
970	switch (aSize)
971		{
972
973		case 0:
974			{
975
976			s++;		// Don't get stuck in infinite loop
977
978			if (isValid)
979				{
980				*isValid = false;
981				}
982
983			return kREPLACEMENT_CHARACTER;
984
985			}
986
987		case 1:
988			{
989
990			return aChar;
991
992			}
993
994		case 2:
995			{
996
997			aChar = ((aChar << 6) + nBuf [1]) - (uint32) 0x00003080UL;
998
999			break;
1000
1001			}
1002
1003		case 3:
1004			{
1005
1006			aChar =  ((((aChar << 6) + nBuf [1])
1007							   << 6) + nBuf [2]) - (uint32) 0x000E2080UL;
1008
1009			break;
1010
1011			}
1012
1013		case 4:
1014			{
1015
1016			aChar = ((((((aChar << 6) + nBuf [1])
1017							    << 6) + nBuf [2])
1018								<< 6) + nBuf [3]) - (uint32) 0x03C82080UL;
1019
1020			break;
1021
1022			}
1023
1024		case 5:
1025			{
1026
1027			aChar = ((((((((aChar << 6) + nBuf [1])
1028								  << 6) + nBuf [2])
1029								  << 6) + nBuf [3])
1030								  << 6) + nBuf [4]) - (uint32) 0xFA082080UL;
1031
1032			break;
1033
1034			}
1035
1036		case 6:
1037			{
1038
1039			aChar = ((((((((((aChar << 6) + nBuf [1])
1040								    << 6) + nBuf [2])
1041								    << 6) + nBuf [3])
1042								    << 6) + nBuf [4])
1043								    << 6) + nBuf [5]) - (uint32) 0x82082080UL;
1044
1045			break;
1046
1047			}
1048
1049		}
1050
1051	if (aChar < 0x7F || aChar > 0x0010FFFF)
1052		{
1053
1054		if (isValid)
1055			{
1056			*isValid = false;
1057			}
1058
1059		return kREPLACEMENT_CHARACTER;
1060
1061		}
1062
1063	return aChar;
1064
1065	}
1066
1067/*****************************************************************************/
1068
1069bool dng_string::IsUTF8 (const char *s)
1070	{
1071
1072	uint32 len = strlenAsUint32 (s);
1073
1074	const char *sEnd = s + len;
1075
1076	while (s < sEnd)
1077		{
1078
1079		bool isValid = true;
1080
1081		(void) DecodeUTF8 (s, (uint32) (sEnd - s), &isValid);
1082
1083		if (!isValid)
1084			{
1085			return false;
1086			}
1087
1088		}
1089
1090	return true;
1091
1092	}
1093
1094/*****************************************************************************/
1095
1096void dng_string::Set_UTF8_or_System (const char *s)
1097	{
1098
1099	if (::IsASCII (s))
1100		{
1101
1102		Set (s);
1103
1104		}
1105
1106	else if (IsUTF8 (s))
1107		{
1108
1109		Set_UTF8 (s);
1110
1111		}
1112
1113	else
1114		{
1115
1116		Set_SystemEncoding (s);
1117
1118		}
1119
1120	}
1121
1122/*****************************************************************************/
1123
1124uint32 dng_string::Get_UTF16 (dng_memory_data &buffer) const
1125	{
1126
1127	uint32 count = 0;
1128
1129	const char *sPtr = Get ();
1130
1131	while (*sPtr)
1132		{
1133
1134		uint32 x = DecodeUTF8 (sPtr);
1135
1136		if (x <= 0x0000FFFF ||
1137			x >  0x0010FFFF)
1138			{
1139
1140			count = SafeUint32Add (count, 1);
1141
1142			}
1143
1144		else
1145			{
1146
1147			count = SafeUint32Add (count, 2);
1148
1149			}
1150
1151		}
1152
1153	const uint32 destBufferLength = SafeUint32Add (count, 1);
1154	buffer.Allocate (destBufferLength, sizeof (uint16));
1155
1156	uint16 *dPtr = buffer.Buffer_uint16 ();
1157	uint16 * const destEnd = dPtr + destBufferLength;
1158
1159	sPtr = Get ();
1160
1161	while (*sPtr)
1162		{
1163
1164		uint32 x = DecodeUTF8 (sPtr);
1165
1166		if (x <= 0x0000FFFF)
1167			{
1168
1169			CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1170			*(dPtr++) = (uint16) x;
1171
1172			}
1173
1174		else if (x > 0x0010FFFF)
1175			{
1176
1177			CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1178			*(dPtr++) = (uint16) kREPLACEMENT_CHARACTER;
1179
1180			}
1181
1182		else
1183			{
1184
1185			x -= 0x00010000;
1186
1187			CheckSpaceLeftInBuffer (dPtr, destEnd, 2);
1188			*(dPtr++) = (uint16) ((x >> 10       ) + 0x0000D800);
1189			*(dPtr++) = (uint16) ((x & 0x000003FF) + 0x0000DC00);
1190
1191			}
1192
1193		}
1194
1195	CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1196	*dPtr = 0;
1197
1198	return count;
1199
1200	}
1201
1202/*****************************************************************************/
1203
1204void dng_string::Set_UTF16 (const uint16 *s)
1205	{
1206
1207	if (!s)
1208		{
1209		Clear ();
1210		return;
1211		}
1212
1213	bool swap = false;
1214
1215	if (s [0] == 0xFFFE)		// Swapped byte order marker
1216		{
1217		swap = true;
1218		s++;
1219		}
1220
1221	else if (s [0] == 0xFEFF)	// Non-swapped byte order marker
1222		{
1223		s++;
1224		}
1225
1226	uint32 length16 = 0;
1227
1228	while (s [length16] != 0)
1229		{
1230		length16 = SafeUint32Add (length16, 1);
1231		}
1232
1233	const uint16 *sEnd = s + length16;
1234
1235	const uint32 destBufferSize =
1236		SafeUint32Add (SafeUint32Mult (length16, 6), 1);
1237	dng_memory_data buffer (destBufferSize);
1238
1239	uint8 *d = buffer.Buffer_uint8 ();
1240	uint8 * const destEnd = d + destBufferSize;
1241
1242	while (s < sEnd)
1243		{
1244
1245		uint32 aChar = *s++;
1246
1247		if (swap)
1248			{
1249			aChar = ((aChar << 8) | (aChar >> 8)) & 0x0000FFFF;
1250			}
1251
1252		if ((aChar >= 0x0000D800) && (aChar <= 0x0000DBFF) && (s < sEnd))
1253			{
1254
1255			uint32 aLow = *s;
1256
1257			if (swap)
1258				{
1259				aLow = ((aLow << 8) | (aLow >> 8)) & 0x0000FFFF;
1260				}
1261
1262			if ((aLow >= 0x0000DC00) && (aLow <= 0x0000DFFF))
1263				{
1264
1265				aChar = ((aChar - 0x0000D800) << 10) +
1266					    (aLow - 0x0000DC00) +
1267					    0x00010000;
1268
1269				s++;
1270
1271				}
1272
1273			}
1274
1275		if (aChar > 0x7FFFFFFF)
1276			{
1277			aChar = kREPLACEMENT_CHARACTER;
1278			}
1279
1280		if (aChar < 0x00000080)
1281			{
1282			CheckSpaceLeftInBuffer (d, destEnd, 1);
1283			*(d++) = (uint8) aChar;
1284			}
1285
1286		else if (aChar < 0x00000800)
1287			{
1288			CheckSpaceLeftInBuffer (d, destEnd, 2);
1289			*(d++) = (uint8) ((aChar >> 6) | 0x000000C0);
1290			*(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080);
1291			}
1292
1293		else if (aChar < 0x00010000)
1294			{
1295			CheckSpaceLeftInBuffer (d, destEnd, 3);
1296			*(d++) = (uint8) ( (aChar >> 12) | 0x000000E0);
1297			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1298			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1299			}
1300
1301		else if (aChar < 0x00200000)
1302			{
1303			CheckSpaceLeftInBuffer (d, destEnd, 4);
1304			*(d++) = (uint8) ( (aChar >> 18) | 0x000000F0);
1305			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1306			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1307			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1308			}
1309
1310		else if (aChar < 0x04000000)
1311			{
1312			CheckSpaceLeftInBuffer (d, destEnd, 5);
1313			*(d++) = (uint8) ( (aChar >> 24) | 0x000000F8);
1314			*(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
1315			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1316			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1317			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1318			}
1319
1320		else
1321			{
1322			CheckSpaceLeftInBuffer (d, destEnd, 6);
1323			*(d++) = (uint8) ( (aChar >> 30) | 0x000000FC);
1324			*(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080);
1325			*(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
1326			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1327			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1328			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1329			}
1330
1331		}
1332
1333	CheckSpaceLeftInBuffer (d, destEnd, 1);
1334	*d = 0;
1335
1336	Set (buffer.Buffer_char ());
1337
1338	}
1339
1340/*****************************************************************************/
1341
1342void dng_string::Clear ()
1343	{
1344
1345	Set (NULL);
1346
1347	}
1348
1349/*****************************************************************************/
1350
1351void dng_string::Truncate (uint32 maxBytes)
1352	{
1353
1354	uint32 len = Length ();
1355
1356	if (len > maxBytes)
1357		{
1358
1359		uint8 *s = fData.Buffer_uint8 ();
1360
1361		// Don't truncate on an extension character.  Extensions characters
1362		// in UTF-8 have the 0x80 bit set and the 0x40 bit clear.
1363
1364		while (maxBytes > 0 && ((s [maxBytes]) & 0xC0) == 0x80)
1365			{
1366
1367			maxBytes--;
1368
1369			}
1370
1371		s [maxBytes] = 0;
1372
1373		}
1374
1375	}
1376
1377/*****************************************************************************/
1378
1379bool dng_string::TrimTrailingBlanks ()
1380	{
1381
1382	bool didTrim = false;
1383
1384	if (fData.Buffer ())
1385		{
1386
1387		char *s = fData.Buffer_char ();
1388
1389		uint32 len = strlenAsUint32 (s);
1390
1391		while (len > 0 && s [len - 1] == ' ')
1392			{
1393			len--;
1394			didTrim = true;
1395			}
1396
1397		s [len] = 0;
1398
1399		}
1400
1401	return didTrim;
1402
1403	}
1404
1405/*****************************************************************************/
1406
1407bool dng_string::TrimLeadingBlanks ()
1408	{
1409
1410	bool didTrim = false;
1411
1412	const char *s = Get ();
1413
1414	while (*s == ' ')
1415		{
1416		s++;
1417		didTrim = true;
1418		}
1419
1420	if (didTrim)
1421		{
1422		Set (s);
1423		}
1424
1425	return didTrim;
1426
1427	}
1428
1429/*****************************************************************************/
1430
1431bool dng_string::IsEmpty () const
1432	{
1433
1434	const char *s = Get ();
1435
1436	return *s == 0;
1437
1438	}
1439
1440/*****************************************************************************/
1441
1442uint32 dng_string::Length () const
1443	{
1444
1445	const char *s = Get ();
1446
1447	return strlenAsUint32 (s);
1448
1449	}
1450
1451/*****************************************************************************/
1452
1453bool dng_string::operator== (const dng_string &s) const
1454	{
1455
1456	const char *s1 =   Get ();
1457	const char *s2 = s.Get ();
1458
1459	return strcmp (s1, s2) == 0;
1460
1461	}
1462
1463/*****************************************************************************/
1464
1465bool dng_string::Matches (const char *t,
1466						  const char *s,
1467						  bool case_sensitive)
1468	{
1469
1470	while (*s != 0)
1471		{
1472
1473		char c1 = *(s++);
1474		char c2 = *(t++);
1475
1476		if (!case_sensitive)
1477			{
1478			c1 = ForceUppercase (c1);
1479			c2 = ForceUppercase (c2);
1480			}
1481
1482		if (c1 != c2)
1483			{
1484			return false;
1485			}
1486
1487		}
1488
1489	return (*t == 0);
1490
1491	}
1492
1493/*****************************************************************************/
1494
1495bool dng_string::Matches (const char *s,
1496						  bool case_sensitive) const
1497	{
1498
1499	return dng_string::Matches (Get (), s, case_sensitive);
1500
1501	}
1502
1503/*****************************************************************************/
1504
1505bool dng_string::StartsWith (const char *s,
1506						     bool case_sensitive) const
1507	{
1508
1509	const char *t = Get ();
1510
1511	while (*s != 0)
1512		{
1513
1514		char c1 = *(s++);
1515		char c2 = *(t++);
1516
1517		if (!case_sensitive)
1518			{
1519			c1 = ForceUppercase (c1);
1520			c2 = ForceUppercase (c2);
1521			}
1522
1523		if (c1 != c2)
1524			{
1525			return false;
1526			}
1527
1528		}
1529
1530	return true;
1531
1532	}
1533
1534/*****************************************************************************/
1535
1536bool dng_string::EndsWith (const char *s,
1537						   bool case_sensitive) const
1538	{
1539
1540	uint32 len1 = Length ();
1541
1542	uint32 len2 = strlenAsUint32 (s);
1543
1544	if (len1 < len2)
1545		{
1546		return false;
1547		}
1548
1549	const char *t = Get () + (len1 - len2);
1550
1551	while (*s != 0)
1552		{
1553
1554		char c1 = *(s++);
1555		char c2 = *(t++);
1556
1557		if (!case_sensitive)
1558			{
1559			c1 = ForceUppercase (c1);
1560			c2 = ForceUppercase (c2);
1561			}
1562
1563		if (c1 != c2)
1564			{
1565			return false;
1566			}
1567
1568		}
1569
1570	return true;
1571
1572	}
1573
1574/*****************************************************************************/
1575
1576bool dng_string::Contains (const char *s,
1577						   bool case_sensitive,
1578						   int32 *match_offset) const
1579	{
1580
1581	if (match_offset)
1582		{
1583		*match_offset = -1;
1584		}
1585
1586	uint32 len1 = Length ();
1587
1588	uint32 len2 = strlenAsUint32 (s);
1589
1590	if (len1 < len2)
1591		{
1592		return false;
1593		}
1594
1595	uint32 offsets = len1 - len2;
1596
1597	for (uint32 offset = 0; offset <= offsets; offset++)
1598		{
1599
1600		const char *ss = s;
1601		const char *tt = Get () + offset;
1602
1603		while (*ss != 0)
1604			{
1605
1606			char c1 = *(ss++);
1607			char c2 = *(tt++);
1608
1609			if (!case_sensitive)
1610				{
1611				c1 = ForceUppercase (c1);
1612				c2 = ForceUppercase (c2);
1613				}
1614
1615			if (c1 != c2)
1616				{
1617				goto tryNextOffset;
1618				}
1619
1620			}
1621
1622		if (match_offset)
1623			{
1624			*match_offset = offset;
1625			}
1626
1627		return true;
1628
1629		tryNextOffset:	;
1630
1631		}
1632
1633	return false;
1634
1635	}
1636
1637/*****************************************************************************/
1638
1639bool dng_string::Replace (const char *old_string,
1640						  const char *new_string,
1641						  bool case_sensitive)
1642	{
1643
1644	int32 match_offset = -1;
1645
1646	if (Contains (old_string,
1647				  case_sensitive,
1648				  &match_offset))
1649		{
1650
1651		uint32 len1 = Length ();
1652
1653		uint32 len2 = strlenAsUint32 (old_string);
1654		uint32 len3 = strlenAsUint32 (new_string);
1655
1656		if (len2 == len3)
1657			{
1658
1659			strncpy (fData.Buffer_char () + match_offset,
1660					 new_string,
1661					 len3);
1662
1663			}
1664
1665		else if (len2 > len3)
1666			{
1667
1668			strncpy (fData.Buffer_char () + match_offset,
1669					 new_string,
1670					 len3);
1671
1672			const char *s = fData.Buffer_char () + match_offset + len2;
1673				  char *d = fData.Buffer_char () + match_offset + len3;
1674
1675			uint32 extra = len1 - match_offset - len2 + 1;	// + 1 for NULL termination
1676
1677			for (uint32 j = 0; j < extra; j++)
1678				{
1679				*(d++) = *(s++);
1680				}
1681
1682			}
1683
1684		else
1685			{
1686
1687			// "len1 - len2" cannot wrap around because we know that if this
1688			// string contains old_string, len1 >= len2 must hold.
1689			dng_memory_data tempBuffer (
1690				SafeUint32Add (SafeUint32Add (len1 - len2, len3), 1));
1691
1692			if (match_offset)
1693				{
1694
1695				strncpy (tempBuffer.Buffer_char (),
1696						 fData     .Buffer_char (),
1697						 match_offset);
1698
1699				}
1700
1701			if (len3)
1702				{
1703
1704				strncpy (tempBuffer.Buffer_char () + match_offset,
1705						 new_string,
1706						 len3);
1707
1708				}
1709
1710			uint32 extra = len1 - match_offset - len2 + 1;	// + 1 for NULL termination
1711
1712			strncpy (tempBuffer.Buffer_char () + match_offset + len3,
1713					 fData     .Buffer_char () + match_offset + len2,
1714					 extra);
1715
1716			Set (tempBuffer.Buffer_char ());
1717
1718			}
1719
1720		return true;
1721
1722		}
1723
1724	return false;
1725
1726	}
1727
1728/*****************************************************************************/
1729
1730bool dng_string::TrimLeading (const char *s,
1731						      bool case_sensitive)
1732	{
1733
1734	if (StartsWith (s, case_sensitive))
1735		{
1736
1737		Set (Get () + strlenAsUint32 (s));
1738
1739		return true;
1740
1741		}
1742
1743	return false;
1744
1745	}
1746
1747/*****************************************************************************/
1748
1749void dng_string::Append (const char *s)
1750	{
1751
1752	uint32 len2 = strlenAsUint32 (s);
1753
1754	if (len2)
1755		{
1756
1757		uint32 len1 = Length ();
1758
1759		dng_memory_data temp (SafeUint32Add (SafeUint32Add (len1, len2), 1));
1760
1761		char *buffer = temp.Buffer_char ();
1762
1763		if (len1)
1764			{
1765			memcpy (buffer, Get (), len1);
1766			}
1767
1768		memcpy (buffer + len1, s, len2 + 1);
1769
1770		Set (buffer);
1771
1772		}
1773
1774	}
1775
1776/*****************************************************************************/
1777
1778void dng_string::SetUppercase ()
1779	{
1780
1781	if (fData.Buffer ())
1782		{
1783
1784		uint32 len = Length ();
1785
1786		char *dPtr = fData.Buffer_char ();
1787
1788		for (uint32 j = 0; j < len; j++)
1789			{
1790
1791			char c = dPtr [j];
1792
1793			if (c >= 'a' && c <= 'z')
1794				{
1795
1796				dPtr [j] = c - 'a' + 'A';
1797
1798				}
1799
1800			}
1801
1802		}
1803
1804	}
1805
1806/*****************************************************************************/
1807
1808void dng_string::SetLowercase ()
1809	{
1810
1811	if (fData.Buffer ())
1812		{
1813
1814		uint32 len = Length ();
1815
1816		char *dPtr = fData.Buffer_char ();
1817
1818		for (uint32 j = 0; j < len; j++)
1819			{
1820
1821			char c = dPtr [j];
1822
1823			if (c >= 'A' && c <= 'Z')
1824				{
1825
1826				dPtr [j] = c - 'A' + 'a';
1827
1828				}
1829
1830			}
1831
1832		}
1833
1834	}
1835
1836/*****************************************************************************/
1837
1838void dng_string::SetLineEndings (char ending)
1839	{
1840
1841	if (fData.Buffer ())
1842		{
1843
1844		const char *sPtr = fData.Buffer_char ();
1845		      char *dPtr = fData.Buffer_char ();
1846
1847		while (*sPtr)
1848			{
1849
1850			char c = *(sPtr++);
1851
1852			char nc = sPtr [0];
1853
1854			if ((c == '\r' && nc == '\n') ||
1855				(c == '\n' && nc == '\r'))
1856				{
1857
1858				sPtr++;
1859
1860				if (ending)
1861					{
1862					*(dPtr++) = ending;
1863					}
1864
1865				}
1866
1867			else if (c == '\n' ||
1868					 c == '\r')
1869				{
1870
1871				if (ending)
1872					{
1873					*(dPtr++) = ending;
1874					}
1875
1876				}
1877
1878			else
1879				{
1880
1881				*(dPtr++) = c;
1882
1883				}
1884
1885			}
1886
1887		*dPtr = 0;
1888
1889		}
1890
1891	}
1892
1893/*****************************************************************************/
1894
1895void dng_string::StripLowASCII ()
1896	{
1897
1898	if (fData.Buffer ())
1899		{
1900
1901		const char *sPtr = fData.Buffer_char ();
1902		      char *dPtr = fData.Buffer_char ();
1903
1904		while (*sPtr)
1905			{
1906
1907			char c = *(sPtr++);
1908
1909			if (c == '\r' || c == '\n' || (uint8) c >= ' ')
1910				{
1911
1912				*(dPtr++) = c;
1913
1914				}
1915
1916			}
1917
1918		*dPtr = 0;
1919
1920		}
1921
1922	}
1923
1924/*****************************************************************************/
1925
1926void dng_string::NormalizeAsCommaSeparatedNumbers ()
1927	{
1928
1929	if (fData.Buffer ())
1930		{
1931
1932		const char *sPtr = fData.Buffer_char ();
1933			  char *dPtr = fData.Buffer_char ();
1934
1935		bool commaInserted = false;
1936
1937		while (*sPtr)
1938			{
1939
1940			uint32 c = DecodeUTF8 (sPtr);
1941
1942			// Support number formats such as "3", "+3.0", "-3.1416", "314.16e-2",
1943			// "0.31416E1", but no hex/octal number representations.
1944
1945			if (isdigit ((int) c) || c == '.' || c == '-' || c == '+' || c == 'e' || c == 'E')
1946				{
1947
1948				*(dPtr++) = (char) c;
1949
1950				if (commaInserted)
1951					{
1952
1953					commaInserted = false;
1954
1955					}
1956
1957				}
1958
1959			else if (!commaInserted)
1960				{
1961
1962				*(dPtr++) = ',';
1963
1964				commaInserted = true;
1965
1966				}
1967
1968			}
1969
1970		*dPtr = 0;
1971
1972		}
1973
1974	}
1975
1976/******************************************************************************/
1977
1978// Unicode to low-ASCII strings table.
1979
1980struct UnicodeToLowASCIIEntry
1981	{
1982	uint32 unicode;
1983	const char *ascii;
1984	};
1985
1986static const UnicodeToLowASCIIEntry kUnicodeToLowASCII [] =
1987	{
1988	{	0x00A0, " "		},
1989	{	0x00A1, "!"		},
1990	{	0x00A9, "(C)"	},
1991	{	0x00AA, "a"		},
1992	{	0x00AB, "<<"	},
1993	{	0x00AC, "!"		},
1994	{	0x00AE, "(R)"	},
1995	{	0x00B0, "dg"	},
1996	{	0x00B1, "+-"	},
1997	{	0x00B7, "."		},
1998	{	0x00BA, "o"		},
1999	{	0x00BB, ">>"	},
2000	{	0x00BF, "?"		},
2001	{	0x00C0, "A"		},
2002	{	0x00C1, "A"		},
2003	{	0x00C2, "A"		},
2004	{	0x00C3, "A"		},
2005	{	0x00C4, "A"		},
2006	{	0x00C5, "A"		},
2007	{	0x00C6, "AE"	},
2008	{	0x00C7, "C"		},
2009	{	0x00C8, "E"		},
2010	{	0x00C9, "E"		},
2011	{	0x00CA, "E"		},
2012	{	0x00CB, "E"		},
2013	{	0x00CC, "I"		},
2014	{	0x00CD, "I"		},
2015	{	0x00CE, "I"		},
2016	{	0x00CF, "I"		},
2017	{	0x00D1, "N"		},
2018	{	0x00D2, "O"		},
2019	{	0x00D3, "O"		},
2020	{	0x00D4, "O"		},
2021	{	0x00D5, "O"		},
2022	{	0x00D6, "O"		},
2023	{	0x00D8, "O"		},
2024	{	0x00D9, "U"		},
2025	{	0x00DA, "U"		},
2026	{	0x00DB, "U"		},
2027	{	0x00DC, "U"		},
2028	{	0x00DD, "Y"		},
2029	{	0x00E0, "a"		},
2030	{	0x00E1, "a"		},
2031	{	0x00E2, "a"		},
2032	{	0x00E3, "a"		},
2033	{	0x00E4, "a"		},
2034	{	0x00E5, "a"		},
2035	{	0x00E6, "ae"	},
2036	{	0x00E7, "c"		},
2037	{	0x00E8, "e"		},
2038	{	0x00E9, "e"		},
2039	{	0x00EA, "e"		},
2040	{	0x00EB, "e"		},
2041	{	0x00EC, "i"		},
2042	{	0x00ED, "i"		},
2043	{	0x00EE, "i"		},
2044	{	0x00EF, "i"		},
2045	{	0x00F1, "n"		},
2046	{	0x00F2, "o"		},
2047	{	0x00F3, "o"		},
2048	{	0x00F4, "o"		},
2049	{	0x00F5, "o"		},
2050	{	0x00F6, "o"		},
2051	{	0x00F7, "/"		},
2052	{	0x00F8, "o"		},
2053	{	0x00F9, "u"		},
2054	{	0x00FA, "u"		},
2055	{	0x00FB, "u"		},
2056	{	0x00FC, "u"		},
2057	{	0x00FD, "y"		},
2058	{	0x00FF, "y"		},
2059	{	0x0131, "i"		},
2060	{	0x0152, "OE"	},
2061	{	0x0153, "oe"	},
2062	{	0x0178, "Y"		},
2063	{	0x2013, "-"		},
2064	{	0x2014, "-"		},
2065	{	0x2018, "'"		},
2066	{	0x2019, "'"		},
2067	{	0x201A, ","		},
2068	{	0x201C, "\""	},
2069	{	0x201D, "\""	},
2070	{	0x201E, ",,"	},
2071	{	0x2022, "."		},
2072	{	0x2026, "..."	},
2073	{	0x2039, "<"		},
2074	{	0x203A, ">"		},
2075	{	0x2044, "/"		},
2076	{	0x2122, "TM"	},
2077	{	0x2206, "d"		},
2078	{	0x2211, "S"		},
2079	{	0x2260, "!="	},
2080	{	0x2264, "<="	},
2081	{	0x2265, ">="	},
2082	{	0x2318, "#"		},
2083	{	0xFB01, "fi"	},
2084	{	0xFB02, "fl"	}
2085	};
2086
2087/******************************************************************************/
2088
2089void dng_string::ForceASCII ()
2090	{
2091
2092	if (!IsASCII ())
2093		{
2094
2095		uint32 tempBufferSize =
2096			SafeUint32Add (SafeUint32Mult(Length(), 3), 1);
2097		dng_memory_data tempBuffer (tempBufferSize);
2098
2099		char *dPtr = tempBuffer.Buffer_char ();
2100		char * const destEnd = dPtr + tempBufferSize;
2101
2102		const char *sPtr = Get ();
2103
2104		while (*sPtr)
2105			{
2106
2107			uint32 x = DecodeUTF8 (sPtr);
2108
2109			if (x <= 0x007F)
2110				{
2111
2112				CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2113				*(dPtr++) = (char) x;
2114
2115				}
2116
2117			else
2118				{
2119
2120				const char *ascii = NULL;
2121
2122				const uint32 kTableEntrys = sizeof (kUnicodeToLowASCII    ) /
2123									        sizeof (kUnicodeToLowASCII [0]);
2124
2125				for (uint32 entry = 0; entry < kTableEntrys; entry++)
2126					{
2127
2128					if (kUnicodeToLowASCII [entry] . unicode == x)
2129						{
2130
2131						ascii = kUnicodeToLowASCII [entry] . ascii;
2132
2133						break;
2134
2135						}
2136
2137					}
2138
2139				if (ascii)
2140					{
2141
2142					while (*ascii)
2143						{
2144
2145						CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2146						*(dPtr++) = *(ascii++);
2147
2148						}
2149
2150					}
2151
2152				else
2153					{
2154
2155					CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2156					*(dPtr++) ='?';
2157
2158					}
2159
2160				}
2161
2162			}
2163
2164		CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2165		*dPtr = 0;
2166
2167		Set (tempBuffer.Buffer_char ());
2168
2169		}
2170
2171	}
2172
2173/******************************************************************************/
2174
2175static dng_mutex gProtectUCCalls ("gProtectUCCalls");
2176
2177/******************************************************************************/
2178
2179int32 dng_string::Compare (const dng_string &s) const
2180	{
2181
2182	#if qMacOS
2183	#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
2184
2185		ThrowProgramError ("Compare() not implemented on iOS");
2186		return 0;
2187
2188	#else
2189
2190		{
2191
2192		dng_memory_data aStrA;
2193		dng_memory_data aStrB;
2194
2195		uint32 aLenA = this->Get_UTF16 (aStrA);
2196		uint32 aLenB = s    .Get_UTF16 (aStrB);
2197
2198		if (aLenA > 0)
2199			{
2200
2201			if (aLenB > 0)
2202				{
2203
2204				// For some Mac OS versions anyway, UCCompareTextDefault is not
2205				// thread safe.
2206
2207				dng_lock_mutex lockMutex (&gProtectUCCalls);
2208
2209				UCCollateOptions aOptions = kUCCollateStandardOptions |
2210											kUCCollatePunctuationSignificantMask;
2211
2212				SInt32 aOrder = -1;
2213
2214				Boolean aEqual = false;
2215
2216				OSStatus searchStatus = ::UCCompareTextDefault (aOptions,
2217																aStrA.Buffer_uint16 (),
2218																aLenA,
2219																aStrB.Buffer_uint16 (),
2220																aLenB,
2221																&aEqual,
2222																&aOrder);
2223
2224				if (searchStatus == noErr)
2225					{
2226
2227					if (aEqual || (aOrder == 0))
2228						{
2229						return 0;
2230						}
2231
2232					else
2233						{
2234						return (aOrder > 0) ? 1 : -1;
2235						}
2236
2237					}
2238
2239				else
2240					{
2241
2242					DNG_REPORT ("UCCompareTextDefault failed");
2243
2244					return -1;
2245
2246					}
2247
2248				}
2249
2250			else
2251				{
2252				return 1;
2253				}
2254
2255			}
2256
2257		else
2258			{
2259
2260			if (aLenB > 0)
2261				{
2262				return -1;
2263				}
2264
2265			else
2266				{
2267				return 0;
2268				}
2269
2270			}
2271
2272		}
2273
2274	#endif  // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
2275	#elif qWinOS
2276
2277		{
2278
2279		dng_memory_data aStrA;
2280		dng_memory_data aStrB;
2281
2282		uint32 aLenA = this->Get_UTF16 (aStrA);
2283		uint32 aLenB = s    .Get_UTF16 (aStrB);
2284
2285		if (aLenA > 0)
2286			{
2287
2288			if (aLenB > 0)
2289				{
2290
2291				LCID locale = LOCALE_SYSTEM_DEFAULT;
2292
2293				DWORD aFlags = NORM_IGNOREWIDTH;
2294
2295				int aOrder = ::CompareStringW (locale,
2296											   aFlags,
2297											   (const WCHAR *) aStrA.Buffer_uint16 (),
2298											   aLenA,
2299											   (const WCHAR *) aStrB.Buffer_uint16 (),
2300											   aLenB);
2301
2302				if (aOrder == CSTR_EQUAL)
2303					{
2304					return 0;
2305					}
2306
2307				else if (aOrder == CSTR_GREATER_THAN)
2308					{
2309					return 1;
2310					}
2311
2312				else
2313					{
2314					return -1;
2315					}
2316
2317				}
2318
2319			else
2320				{
2321				return 1;
2322				}
2323
2324			}
2325
2326		else
2327			{
2328
2329			if (aLenB > 0)
2330				{
2331				return -1;
2332				}
2333			else
2334				{
2335				return 0;
2336				}
2337
2338			}
2339
2340		}
2341
2342	#else
2343
2344	// Fallback to a pure Unicode sort order.
2345
2346		{
2347
2348		for (uint32 pass = 0; pass < 2; pass++)
2349			{
2350
2351			const char *aPtr =   Get ();
2352			const char *bPtr = s.Get ();
2353
2354			while (*aPtr || *bPtr)
2355				{
2356
2357				if (!bPtr)
2358					{
2359					return 1;
2360					}
2361
2362				else if (!aPtr)
2363					{
2364					return -1;
2365					}
2366
2367				uint32 a = DecodeUTF8 (aPtr);
2368				uint32 b = DecodeUTF8 (bPtr);
2369
2370				// Ignore case on first compare pass.
2371
2372				if (pass == 0)
2373					{
2374
2375					if (a >= (uint32) 'a' && a <= (uint32) 'z')
2376						{
2377						a = a - (uint32) 'a' + (uint32) 'A';
2378						}
2379
2380					if (b >= (uint32) 'a' && b <= (uint32) 'z')
2381						{
2382						b = b - (uint32) 'a' + (uint32) 'A';
2383						}
2384
2385					}
2386
2387				if (b > a)
2388					{
2389					return 1;
2390					}
2391
2392				else if (a < b)
2393					{
2394					return -1;
2395					}
2396
2397				}
2398
2399			}
2400
2401		}
2402
2403	#endif
2404
2405	return 0;
2406
2407	}
2408
2409/*****************************************************************************/
2410