1/*****************************************************************************/
2// Copyright 2006-2007 Adobe Systems Incorporated
3// All Rights Reserved.
4//
5// NOTICE:  Adobe permits you to use, modify, and distribute this file in
6// accordance with the terms of the Adobe license agreement accompanying it.
7/*****************************************************************************/
8
9/* $Id: //mondo/dng_sdk_1_4/dng_sdk/source/dng_string.cpp#2 $ */
10/* $DateTime: 2012/07/31 22:04:34 $ */
11/* $Change: 840853 $ */
12/* $Author: tknoll $ */
13
14/*****************************************************************************/
15
16#include "dng_string.h"
17
18#include "dng_assertions.h"
19#include "dng_exceptions.h"
20#include "dng_flags.h"
21#include "dng_mutex.h"
22#include "dng_utils.h"
23#include "dng_safe_arithmetic.h"
24
25#if qMacOS
26#include <TargetConditionals.h>
27#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
28#include <MobileCoreServices/MobileCoreServices.h>
29#else
30#include <CoreServices/CoreServices.h>
31#endif  // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
32#endif  // qMacOS
33
34#if qWinOS
35#include <windows.h>
36#endif
37
38#if qiPhone || qAndroid || qLinux
39#include <ctype.h> // for isdigit
40#endif
41
42/*****************************************************************************/
43
44const uint32 kREPLACEMENT_CHARACTER	= 0x0000FFFD;
45
46/*****************************************************************************/
47
48// Returns the length of the zero-terminated string 's'. Throws a dng_exception
49// if the length of 's' is too large to be represented as a uint32_t.
50static uint32 strlenAsUint32(const char *s)
51	{
52
53	uint32 lengthAsUint32 = 0;
54	ConvertUnsigned(strlen(s), &lengthAsUint32);
55
56	return lengthAsUint32;
57
58	}
59
60// Checks whether there is enough space left in the buffer pointed to by
61// 'currentPos' to write at least 'space' elements of type T (to positions
62// currentPos[0] through currentPos[space - 1]. Throws a dng_exception if there
63// is not enough space left in the buffer.
64// 'bufferEnd' should point one element beyond the end of the buffer. For
65// example, if the buffer is "T buffer[3];", then bufferEnd should point to
66// T + 3.
67template <class T>
68static void CheckSpaceLeftInBuffer(const T *currentPos,
69								   const T *bufferEnd,
70								   size_t space)
71	{
72
73	if (bufferEnd < currentPos || static_cast<size_t>(bufferEnd - currentPos) < space)
74		{
75		ThrowMemoryFull ("Buffer overrun");
76		}
77
78	}
79
80/*****************************************************************************/
81
82// Throws an exception to notify the user of code that has not been security
83// hardened and prevent execution of that code.
84//
85// Though the DNG SDK in general has been security-hardened, this does not apply
86// to the following Mac-OS- and Windows-specific functions. Calls to
87// ThrowNotHardened() have been added to these functions to alert callers of
88// this fact.
89//
90// If you're trying to use a function that calls ThrowNotHardened(), you need to
91// fix the security issues noted in the comment next to the ThrowNotHardened()
92// call. Once you have fixed these issues, obtain a security review for the
93// fixes. This may require fuzzing of the modified code on the target platform.
94static void ThrowNotHardened()
95	{
96	ThrowProgramError ("This function has not been security-hardened");
97	}
98
99#if qMacOS
100#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
101
102static uint32 Extract_SystemEncoding (const dng_string &dngString,
103							   		  dng_memory_data &buffer)
104	{
105		// TODO: Needs implementation.
106		ThrowProgramError ("Extract_SystemEncoding() not implemented on iOS");
107		return 0;
108	}
109
110static void Assign_SystemEncoding (dng_string &dngString,
111							       const char *otherString)
112	{
113		// TODO: Needs implementation.
114		ThrowProgramError ("Assign_SystemEncoding() not implemented on iOS");
115
116	}
117
118static void Assign_JIS_X208_1990 (dng_string &dngString,
119							      const char *otherString)
120	{
121		// TODO: Needs implementation.
122		ThrowProgramError ("Assign_JIS_X208_1990() not implemented on iOS");
123	}
124
125#else
126
127static void Assign_Multibyte (dng_string &dngString,
128							  const char *otherString,
129							  TextEncoding encoding)
130	{
131
132	// This function contains security-vulnerable code. Do not use.
133	// The particular vulnerabilities are:
134	// - Casting the result of strlen() to a uint32 may case truncation. (Use
135	//   strlenAsUint32() instead.)
136	// - The computation of aBufSize and the subsequent addition of 1 in the
137	//   call to the dng_memory_data constructor may wrap around.
138	ThrowNotHardened();
139
140	uint32 aSize = (uint32) strlen (otherString);
141
142	if (aSize > 0)
143		{
144
145		uint32 aBufSize = aSize * 6 + 256;
146
147		dng_memory_data aBuf (aBufSize + 1);
148
149		UnicodeMapping aMapping;
150
151		aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0,
152														 kUnicodeNoSubset,
153														 kUnicodeUTF8Format);
154
155		aMapping.otherEncoding   = encoding;
156		aMapping.mappingVersion  = kUnicodeUseLatestMapping;
157
158		TextToUnicodeInfo aInfo = NULL;
159
160		if (::CreateTextToUnicodeInfo (&aMapping, &aInfo) == noErr)
161			{
162
163			ByteCount aInput  = 0;
164			ByteCount aOutput = 0;
165
166			::ConvertFromTextToUnicode (aInfo,
167										aSize,
168									    otherString,
169									    kUnicodeUseFallbacksMask |
170									    kUnicodeLooseMappingsMask,
171									    0,
172									    NULL,
173									    NULL,
174									    NULL,
175									    aBufSize,
176									    &aInput,
177									    &aOutput,
178									    (UniChar *) aBuf.Buffer ());
179
180			::DisposeTextToUnicodeInfo (&aInfo);
181
182			if (aOutput > 0 && aOutput <= aBufSize)
183				{
184
185				char *aBufChar = aBuf.Buffer_char ();
186
187				aBufChar [aOutput] = 0;
188
189				dngString.Set (aBufChar);
190
191				return;
192
193				}
194
195			}
196
197		}
198
199	dngString.Clear ();
200
201	}
202
203static uint32 Extract_Multibyte (const dng_string &dngString,
204							     dng_memory_data &buffer,
205							     TextEncoding encoding)
206	{
207
208	// This function contains security-vulnerable code. Do not use.
209	// The particular vulnerabilities are:
210	// - The computation of aBufSize may wrap around.
211	// - The computation of the argument to buffer.Allocate() may overflow; the
212	//   conversion to uint32 is also problematic.
213	// - The signed-to-unsigned conversion in the return statement "
214	//   return (uint32) aOutput;" may be problematic.
215	ThrowNotHardened();
216
217	uint32 aSize = dngString.Length ();
218
219	if (aSize > 0)
220		{
221
222		uint32 aBufSize = (aSize * 2) + 256;
223
224		dng_memory_data tempBuffer (aBufSize);
225
226		UnicodeMapping aMapping;
227
228		aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0,
229														 kUnicodeNoSubset,
230														 kUnicodeUTF8Format);
231
232		aMapping.otherEncoding   = encoding;
233		aMapping.mappingVersion  = kUnicodeUseLatestMapping;
234
235		UnicodeToTextInfo aInfo = NULL;
236
237		if (::CreateUnicodeToTextInfo (&aMapping, &aInfo) == noErr)
238			{
239
240			ByteCount aInput  = 0;
241			ByteCount aOutput = 0;
242
243			::ConvertFromUnicodeToText (aInfo,
244										aSize,
245										(const UniChar *) dngString.Get (),
246									    kUnicodeUseFallbacksMask  |
247									    kUnicodeLooseMappingsMask |
248									    kUnicodeDefaultDirectionMask,
249									    0,
250									    NULL,
251									    NULL,
252									    NULL,
253									    aBufSize,
254									    &aInput,
255									    &aOutput,
256									    tempBuffer.Buffer_char ());
257
258			::DisposeUnicodeToTextInfo (&aInfo);
259
260			if (aOutput > 0)
261				{
262
263				buffer.Allocate ((uint32) (aOutput + 1));
264
265				memcpy (buffer.Buffer (),
266						tempBuffer.Buffer (),
267						aOutput);
268
269				buffer.Buffer_char () [aOutput] = 0;
270
271				return (uint32) aOutput;
272
273				}
274
275			}
276
277		}
278
279	buffer.Allocate (1);
280
281	buffer.Buffer_char () [0] = 0;
282
283	return 0;
284
285	}
286
287static void Assign_SystemEncoding (dng_string &dngString,
288							       const char *otherString)
289	{
290
291	TextEncoding aEncoding;
292
293	::UpgradeScriptInfoToTextEncoding (smSystemScript,
294									   kTextLanguageDontCare,
295									   kTextRegionDontCare,
296									   NULL,
297									   &aEncoding);
298
299	Assign_Multibyte (dngString,
300					  otherString,
301					  aEncoding);
302
303	}
304
305static uint32 Extract_SystemEncoding (const dng_string &dngString,
306							   		  dng_memory_data &buffer)
307	{
308
309	TextEncoding aEncoding;
310
311	::UpgradeScriptInfoToTextEncoding (smSystemScript,
312									   kTextLanguageDontCare,
313									   kTextRegionDontCare,
314									   NULL,
315									   &aEncoding);
316
317	return Extract_Multibyte (dngString,
318					   		  buffer,
319					   		  aEncoding);
320
321	}
322
323static void Assign_JIS_X208_1990 (dng_string &dngString,
324							      const char *otherString)
325	{
326
327	Assign_Multibyte (dngString,
328					  otherString,
329					  kTextEncodingJIS_X0208_90);
330
331	}
332
333#endif  // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
334#endif  // qMacOS
335
336/*****************************************************************************/
337
338#if qWinOS
339
340static void Assign_Multibyte (dng_string &dngString,
341							  const char *otherString,
342							  UINT encoding)
343	{
344
345	// This function contains security-vulnerable code. Do not use.
346	// The particular vulnerabilities are:
347	// - Converting the return value of strlen() to int may cause overflow.
348	// - The computation of aBufChars and of the argument to the dng_memory_data
349	//   constructor may overflow. Additionally, there is an implicit
350	//   signed-to-unsigned conversion in the call to the dng_memory_data
351	//   constructor.
352	ThrowNotHardened();
353
354	DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes");
355
356	int aSize = (int) strlen (otherString);
357
358	if (aSize > 0)
359		{
360
361		int aBufChars = aSize * 3 + 128;
362
363		dng_memory_data aBuf ((aBufChars + 1) << 1);
364
365		int aResult = ::MultiByteToWideChar (encoding,
366											 0,
367											 otherString,
368											 aSize,
369											 (WCHAR *) aBuf.Buffer (),
370											 aBufChars);
371
372		if (aResult > 0 && aResult <= aBufChars)
373			{
374
375			uint16 * aUTF16 = aBuf.Buffer_uint16 ();
376
377			aUTF16 [aResult] = 0;
378
379			dngString.Set_UTF16 (aUTF16);
380
381			return;
382
383			}
384
385		}
386
387	dngString.Clear ();
388
389	}
390
391static uint32 Extract_Multibyte (const dng_string &dngString,
392							     dng_memory_data &buffer,
393							     UINT encoding)
394	{
395
396	// This function contains security-vulnerable code. Do not use.
397	// The particular vulnerabilities are:
398	// - Converting the return value of dngString.Get_UTF16() may cause
399	//   overflow.
400	// - The computation of dBufSize may overflow.
401	// - The calls to the dng_memory_data constructor and to buffer.Allocate()
402	//   trigger implicit conversions of int to uint32 that may be problematic.
403	// - The memcpy() call triggers an implicit conversion of aResult to a
404	//   size_t, which may be problematic.
405	// - The conversion of aResult to a uint32 in the return statement may be
406	//   problematic.
407	ThrowNotHardened();
408
409	DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes");
410
411	dng_memory_data sBuffer;
412
413	int aCount = dngString.Get_UTF16 (sBuffer);
414
415	int dBufSize = aCount * 2 + 256;
416
417	dng_memory_data dBuffer (dBufSize);
418
419	int aResult = ::WideCharToMultiByte (encoding,
420										 0,
421										 (WCHAR *) sBuffer.Buffer (),
422										 aCount,
423										 dBuffer.Buffer_char (),
424										 dBufSize,
425										 NULL,
426										 NULL);
427
428	if (aResult < 0)
429		aResult = 0;
430
431	buffer.Allocate (aResult + 1);
432
433	memcpy (buffer.Buffer (),
434			dBuffer.Buffer (),
435			aResult);
436
437	buffer.Buffer_char () [aResult] = 0;
438
439	return (uint32) aResult;
440
441	}
442
443static void Assign_SystemEncoding (dng_string &dngString,
444							       const char *otherString)
445	{
446
447	Assign_Multibyte (dngString,
448					  otherString,
449					  ::GetACP ());
450
451	}
452
453static uint32 Extract_SystemEncoding (const dng_string &dngString,
454							   		  dng_memory_data &buffer)
455	{
456
457	return Extract_Multibyte (dngString,
458					   		  buffer,
459					   		  ::GetACP ());
460
461	}
462
463static void Assign_JIS_X208_1990 (dng_string &dngString,
464							      const char *otherString)
465	{
466
467	// From MSDN documentation: 20932 = JIS X 0208-1990 & 0121-1990
468
469	const UINT kJIS = 20932;
470
471	Assign_Multibyte (dngString,
472					  otherString,
473					  kJIS);
474
475	}
476
477#endif
478
479/*****************************************************************************/
480
481static bool IsASCII (const char *s)
482	{
483
484	if (!s)
485		{
486
487		return true;
488
489		}
490
491	while (true)
492		{
493
494		uint8 c = (uint8) *(s++);
495
496		if (c == 0)
497			{
498
499			break;
500
501			}
502
503		if (c & 0x80)
504			{
505
506			return false;
507
508			}
509
510		}
511
512	return true;
513
514	}
515
516/*****************************************************************************/
517
518dng_string::dng_string ()
519
520	:	fData ()
521
522	{
523
524	}
525
526/*****************************************************************************/
527
528dng_string::dng_string (const dng_string &s)
529
530	:	fData ()
531
532	{
533
534	Set (s.Get ());
535
536	}
537
538/*****************************************************************************/
539
540dng_string & dng_string::operator= (const dng_string &s)
541	{
542
543	if (this != &s)
544		{
545
546		Set (s.Get ());
547
548		}
549
550	return *this;
551
552	}
553
554/*****************************************************************************/
555
556dng_string::~dng_string ()
557	{
558
559	}
560
561/*****************************************************************************/
562
563const char * dng_string::Get () const
564	{
565
566	if (fData.Buffer ())
567		{
568
569		return fData.Buffer_char ();
570
571		}
572
573	return "";
574
575	}
576
577/*****************************************************************************/
578
579bool dng_string::IsASCII () const
580	{
581
582	return ::IsASCII (Get ());
583
584	}
585
586/*****************************************************************************/
587
588void dng_string::Set (const char *s)
589	{
590
591	// Measure the new length.
592
593	uint32 newLen = (s != NULL ? strlenAsUint32 (s) : 0);
594
595	// If it is a NULL string, then clear the buffer.
596
597	if (newLen == 0)
598		{
599
600		fData.Clear ();
601
602		}
603
604	// Else we need to copy the bytes.
605
606	else
607		{
608
609		uint32 oldLen = Length ();
610
611		// We might be setting this string to a sub-string of itself,
612		// so don't reallocate the data unless the string is getting
613		// longer.
614
615		if (newLen > oldLen)
616			{
617
618			fData.Clear ();
619
620			fData.Allocate (SafeUint32Add (newLen, 1));
621
622			}
623
624		char *d = fData.Buffer_char ();
625
626		for (uint32 k = 0; k <= newLen; k++)
627			{
628
629			d [k] = s [k];
630
631			}
632
633		}
634
635	}
636
637/*****************************************************************************/
638
639void dng_string::Set_ASCII (const char *s)
640	{
641
642	if (::IsASCII (s))
643		{
644
645		Set (s);
646
647		}
648
649	else
650		{
651
652		Set_SystemEncoding (s);
653
654		}
655
656	}
657
658/*****************************************************************************/
659
660void dng_string::Set_UTF8 (const char *s)
661	{
662
663	uint32 len = strlenAsUint32 (s);
664
665	const char *sEnd = s + len;
666
667	// Worst case expansion is 1-byte characters expanding to
668	// replacement character, which requires 3 bytes.
669
670	const uint32 destBufferLength = SafeUint32Add (SafeUint32Mult (len, 3), 1);
671	dng_memory_data buffer (destBufferLength);
672
673	uint8 *d = buffer.Buffer_uint8 ();
674	uint8 * const destEnd = d + destBufferLength;
675
676	while (s < sEnd)
677		{
678
679		uint32 aChar = DecodeUTF8 (s, (uint32) (sEnd - s));
680
681		if (aChar > 0x7FFFFFFF)
682			{
683			aChar = kREPLACEMENT_CHARACTER;
684			}
685
686		#if qDNGValidate
687
688		if (aChar == kREPLACEMENT_CHARACTER)
689			{
690			ReportWarning ("Expected UTF-8 value is not valid UTF-8 (or contains a kREPLACEMENT_CHARACTER)");
691			}
692
693		#endif
694
695		if (aChar < 0x00000080)
696			{
697			CheckSpaceLeftInBuffer (d, destEnd, 1);
698			*(d++) = (uint8) aChar;
699			}
700
701		else if (aChar < 0x00000800)
702			{
703			CheckSpaceLeftInBuffer (d, destEnd, 2);
704			*(d++) = (uint8) ((aChar >> 6) | 0x000000C0);
705			*(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080);
706			}
707
708		else if (aChar < 0x00010000)
709			{
710			CheckSpaceLeftInBuffer (d, destEnd, 3);
711			*(d++) = (uint8) ( (aChar >> 12) | 0x000000E0);
712			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
713			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
714			}
715
716		else if (aChar < 0x00200000)
717			{
718			CheckSpaceLeftInBuffer (d, destEnd, 4);
719			*(d++) = (uint8) ( (aChar >> 18) | 0x000000F0);
720			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
721			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
722			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
723			}
724
725		else if (aChar < 0x04000000)
726			{
727			CheckSpaceLeftInBuffer (d, destEnd, 5);
728			*(d++) = (uint8) ( (aChar >> 24) | 0x000000F8);
729			*(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
730			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
731			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
732			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
733			}
734
735		else
736			{
737			CheckSpaceLeftInBuffer (d, destEnd, 6);
738			*(d++) = (uint8) ( (aChar >> 30) | 0x000000FC);
739			*(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080);
740			*(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
741			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
742			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
743			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
744			}
745
746		}
747
748	CheckSpaceLeftInBuffer (d, destEnd, 1);
749	*d = 0;
750
751	Set (buffer.Buffer_char ());
752
753	}
754
755/*****************************************************************************/
756
757uint32 dng_string::Get_SystemEncoding (dng_memory_data &buffer) const
758	{
759
760	if (IsASCII ())
761		{
762
763		uint32 len = Length ();
764
765		const uint32 destBufferLength = SafeUint32Add (len, 1);
766		buffer.Allocate (destBufferLength);
767
768		memcpy (buffer.Buffer (), Get (), destBufferLength);
769
770		return len;
771
772		}
773
774	else
775		{
776
777		#if qMacOS || qWinOS
778
779		return Extract_SystemEncoding (*this, buffer);
780
781		#else
782
783		// Fallback logic to force the string to ASCII.
784
785		dng_string temp (*this);
786
787		temp.ForceASCII ();
788
789		return temp.Get_SystemEncoding (buffer);
790
791		#endif
792
793		}
794
795	}
796
797/*****************************************************************************/
798
799void dng_string::Set_SystemEncoding (const char *s)
800	{
801
802	if (::IsASCII (s))
803		{
804
805		Set (s);
806
807		}
808
809	else
810		{
811
812		#if qMacOS || qWinOS
813
814		Assign_SystemEncoding (*this, s);
815
816		#else
817
818		// Fallback logic that just grabs the ASCII characters and
819		// ignores the non-ASCII characters.
820
821		uint32 len = strlenAsUint32 (s);
822
823		const uint32 destBufferLength = SafeUint32Add (len, 1);
824		dng_memory_data buffer (destBufferLength);
825
826		uint8 *d = buffer.Buffer_uint8 ();
827		uint8 * const destEnd = d + destBufferLength;
828
829		while (*s)
830			{
831
832			uint8 c = (uint8) *(s++);
833
834			if ((c & 0x80) == 0)
835				{
836
837				CheckSpaceLeftInBuffer (d, destEnd, 1);
838				*(d++) = c;
839
840				}
841
842			}
843
844		CheckSpaceLeftInBuffer (d, destEnd, 1);
845		*d = 0;
846
847		Set (buffer.Buffer_char ());
848
849		#endif
850
851		}
852
853	}
854
855/*****************************************************************************/
856
857bool dng_string::ValidSystemEncoding () const
858	{
859
860	if (IsASCII ())
861		{
862
863		return true;
864
865		}
866
867	dng_memory_data buffer;
868
869	Get_SystemEncoding (buffer);
870
871	dng_string temp;
872
873	temp.Set_SystemEncoding (buffer.Buffer_char ());
874
875	return (*this == temp);
876
877	}
878
879/*****************************************************************************/
880
881void dng_string::Set_JIS_X208_1990 (const char *s)
882	{
883
884	if (::IsASCII (s))
885		{
886
887		Set (s);
888
889		}
890
891	else
892		{
893
894		#if qMacOS || qWinOS
895
896		Assign_JIS_X208_1990 (*this, s);
897
898		#else
899
900		// Fallback to the ASCII extraction logic.
901
902		Set_SystemEncoding (s);
903
904		#endif
905
906		}
907
908	}
909
910/*****************************************************************************/
911
912uint32 dng_string::DecodeUTF8 (const char *&s,
913							   uint32 maxBytes,
914							   bool *isValid)
915	{
916
917	static const uint8 gUTF8Bytes [256] =
918		{
919		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
920		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
921		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
922		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
923		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
924		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
925		2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
926		3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0
927		};
928
929	if (isValid)
930		{
931		*isValid = true;
932		}
933
934	const uint8 *nBuf = (const uint8 *) s;
935
936	uint32 aChar = nBuf [0];
937
938	uint32 aSize = gUTF8Bytes [aChar];
939
940	if (aSize > maxBytes)
941		{
942
943		s += maxBytes;
944
945		if (isValid)
946			{
947			*isValid = false;
948			}
949
950		return kREPLACEMENT_CHARACTER;
951
952		}
953
954	s += aSize;
955
956	for (uint32 extra = 1; extra < aSize; extra++)
957		{
958
959		if ((nBuf [extra] & 0xC0) != 0x80)
960			{
961
962			if (isValid)
963				{
964				*isValid = false;
965				}
966
967			return kREPLACEMENT_CHARACTER;
968
969			}
970
971		}
972
973	switch (aSize)
974		{
975
976		case 0:
977			{
978
979			s++;		// Don't get stuck in infinite loop
980
981			if (isValid)
982				{
983				*isValid = false;
984				}
985
986			return kREPLACEMENT_CHARACTER;
987
988			}
989
990		case 1:
991			{
992
993			return aChar;
994
995			}
996
997		case 2:
998			{
999
1000			aChar = ((aChar << 6) + nBuf [1]) - (uint32) 0x00003080UL;
1001
1002			break;
1003
1004			}
1005
1006		case 3:
1007			{
1008
1009			aChar =  ((((aChar << 6) + nBuf [1])
1010							   << 6) + nBuf [2]) - (uint32) 0x000E2080UL;
1011
1012			break;
1013
1014			}
1015
1016		case 4:
1017			{
1018
1019			aChar = ((((((aChar << 6) + nBuf [1])
1020							    << 6) + nBuf [2])
1021								<< 6) + nBuf [3]) - (uint32) 0x03C82080UL;
1022
1023			break;
1024
1025			}
1026		}
1027
1028	if (aChar < 0x7F || aChar > 0x0010FFFF)
1029		{
1030
1031		if (isValid)
1032			{
1033			*isValid = false;
1034			}
1035
1036		return kREPLACEMENT_CHARACTER;
1037
1038		}
1039
1040	return aChar;
1041
1042	}
1043
1044/*****************************************************************************/
1045
1046bool dng_string::IsUTF8 (const char *s)
1047	{
1048
1049	uint32 len = strlenAsUint32 (s);
1050
1051	const char *sEnd = s + len;
1052
1053	while (s < sEnd)
1054		{
1055
1056		bool isValid = true;
1057
1058		(void) DecodeUTF8 (s, (uint32) (sEnd - s), &isValid);
1059
1060		if (!isValid)
1061			{
1062			return false;
1063			}
1064
1065		}
1066
1067	return true;
1068
1069	}
1070
1071/*****************************************************************************/
1072
1073void dng_string::Set_UTF8_or_System (const char *s)
1074	{
1075
1076	if (::IsASCII (s))
1077		{
1078
1079		Set (s);
1080
1081		}
1082
1083	else if (IsUTF8 (s))
1084		{
1085
1086		Set_UTF8 (s);
1087
1088		}
1089
1090	else
1091		{
1092
1093		Set_SystemEncoding (s);
1094
1095		}
1096
1097	}
1098
1099/*****************************************************************************/
1100
1101uint32 dng_string::Get_UTF16 (dng_memory_data &buffer) const
1102	{
1103
1104	uint32 count = 0;
1105
1106	const char *sPtr = Get ();
1107
1108	while (*sPtr)
1109		{
1110
1111		uint32 x = DecodeUTF8 (sPtr);
1112
1113		if (x <= 0x0000FFFF ||
1114			x >  0x0010FFFF)
1115			{
1116
1117			count = SafeUint32Add (count, 1);
1118
1119			}
1120
1121		else
1122			{
1123
1124			count = SafeUint32Add (count, 2);
1125
1126			}
1127
1128		}
1129
1130	const uint32 destBufferLength = SafeUint32Add (count, 1);
1131	buffer.Allocate (destBufferLength, sizeof (uint16));
1132
1133	uint16 *dPtr = buffer.Buffer_uint16 ();
1134	uint16 * const destEnd = dPtr + destBufferLength;
1135
1136	sPtr = Get ();
1137
1138	while (*sPtr)
1139		{
1140
1141		uint32 x = DecodeUTF8 (sPtr);
1142
1143		if (x <= 0x0000FFFF)
1144			{
1145
1146			CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1147			*(dPtr++) = (uint16) x;
1148
1149			}
1150
1151		else if (x > 0x0010FFFF)
1152			{
1153
1154			CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1155			*(dPtr++) = (uint16) kREPLACEMENT_CHARACTER;
1156
1157			}
1158
1159		else
1160			{
1161
1162			x -= 0x00010000;
1163
1164			CheckSpaceLeftInBuffer (dPtr, destEnd, 2);
1165			*(dPtr++) = (uint16) ((x >> 10       ) + 0x0000D800);
1166			*(dPtr++) = (uint16) ((x & 0x000003FF) + 0x0000DC00);
1167
1168			}
1169
1170		}
1171
1172	CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1173	*dPtr = 0;
1174
1175	return count;
1176
1177	}
1178
1179/*****************************************************************************/
1180
1181void dng_string::Set_UTF16 (const uint16 *s)
1182	{
1183
1184	if (!s)
1185		{
1186		Clear ();
1187		return;
1188		}
1189
1190	bool swap = false;
1191
1192	if (s [0] == 0xFFFE)		// Swapped byte order marker
1193		{
1194		swap = true;
1195		s++;
1196		}
1197
1198	else if (s [0] == 0xFEFF)	// Non-swapped byte order marker
1199		{
1200		s++;
1201		}
1202
1203	uint32 length16 = 0;
1204
1205	while (s [length16] != 0)
1206		{
1207		length16 = SafeUint32Add (length16, 1);
1208		}
1209
1210	const uint16 *sEnd = s + length16;
1211
1212	const uint32 destBufferSize =
1213		SafeUint32Add (SafeUint32Mult (length16, 6), 1);
1214	dng_memory_data buffer (destBufferSize);
1215
1216	uint8 *d = buffer.Buffer_uint8 ();
1217	uint8 * const destEnd = d + destBufferSize;
1218
1219	while (s < sEnd)
1220		{
1221
1222		uint32 aChar = *s++;
1223
1224		if (swap)
1225			{
1226			aChar = ((aChar << 8) | (aChar >> 8)) & 0x0000FFFF;
1227			}
1228
1229		if ((aChar >= 0x0000D800) && (aChar <= 0x0000DBFF) && (s < sEnd))
1230			{
1231
1232			uint32 aLow = *s;
1233
1234			if (swap)
1235				{
1236				aLow = ((aLow << 8) | (aLow >> 8)) & 0x0000FFFF;
1237				}
1238
1239			if ((aLow >= 0x0000DC00) && (aLow <= 0x0000DFFF))
1240				{
1241
1242				aChar = ((aChar - 0x0000D800) << 10) +
1243					    (aLow - 0x0000DC00) +
1244					    0x00010000;
1245
1246				s++;
1247
1248				}
1249
1250			}
1251
1252		if (aChar > 0x7FFFFFFF)
1253			{
1254			aChar = kREPLACEMENT_CHARACTER;
1255			}
1256
1257		if (aChar < 0x00000080)
1258			{
1259			CheckSpaceLeftInBuffer (d, destEnd, 1);
1260			*(d++) = (uint8) aChar;
1261			}
1262
1263		else if (aChar < 0x00000800)
1264			{
1265			CheckSpaceLeftInBuffer (d, destEnd, 2);
1266			*(d++) = (uint8) ((aChar >> 6) | 0x000000C0);
1267			*(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080);
1268			}
1269
1270		else if (aChar < 0x00010000)
1271			{
1272			CheckSpaceLeftInBuffer (d, destEnd, 3);
1273			*(d++) = (uint8) ( (aChar >> 12) | 0x000000E0);
1274			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1275			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1276			}
1277
1278		else if (aChar < 0x00200000)
1279			{
1280			CheckSpaceLeftInBuffer (d, destEnd, 4);
1281			*(d++) = (uint8) ( (aChar >> 18) | 0x000000F0);
1282			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1283			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1284			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1285			}
1286
1287		else if (aChar < 0x04000000)
1288			{
1289			CheckSpaceLeftInBuffer (d, destEnd, 5);
1290			*(d++) = (uint8) ( (aChar >> 24) | 0x000000F8);
1291			*(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
1292			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1293			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1294			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1295			}
1296
1297		else
1298			{
1299			CheckSpaceLeftInBuffer (d, destEnd, 6);
1300			*(d++) = (uint8) ( (aChar >> 30) | 0x000000FC);
1301			*(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080);
1302			*(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
1303			*(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1304			*(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1305			*(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1306			}
1307
1308		}
1309
1310	CheckSpaceLeftInBuffer (d, destEnd, 1);
1311	*d = 0;
1312
1313	Set (buffer.Buffer_char ());
1314
1315	}
1316
1317/*****************************************************************************/
1318
1319void dng_string::Clear ()
1320	{
1321
1322	Set (NULL);
1323
1324	}
1325
1326/*****************************************************************************/
1327
1328void dng_string::Truncate (uint32 maxBytes)
1329	{
1330
1331	uint32 len = Length ();
1332
1333	if (len > maxBytes)
1334		{
1335
1336		uint8 *s = fData.Buffer_uint8 ();
1337
1338		// Don't truncate on an extension character.  Extensions characters
1339		// in UTF-8 have the 0x80 bit set and the 0x40 bit clear.
1340
1341		while (maxBytes > 0 && ((s [maxBytes]) & 0xC0) == 0x80)
1342			{
1343
1344			maxBytes--;
1345
1346			}
1347
1348		s [maxBytes] = 0;
1349
1350		}
1351
1352	}
1353
1354/*****************************************************************************/
1355
1356bool dng_string::TrimTrailingBlanks ()
1357	{
1358
1359	bool didTrim = false;
1360
1361	if (fData.Buffer ())
1362		{
1363
1364		char *s = fData.Buffer_char ();
1365
1366		uint32 len = strlenAsUint32 (s);
1367
1368		while (len > 0 && s [len - 1] == ' ')
1369			{
1370			len--;
1371			didTrim = true;
1372			}
1373
1374		s [len] = 0;
1375
1376		}
1377
1378	return didTrim;
1379
1380	}
1381
1382/*****************************************************************************/
1383
1384bool dng_string::TrimLeadingBlanks ()
1385	{
1386
1387	bool didTrim = false;
1388
1389	const char *s = Get ();
1390
1391	while (*s == ' ')
1392		{
1393		s++;
1394		didTrim = true;
1395		}
1396
1397	if (didTrim)
1398		{
1399		Set (s);
1400		}
1401
1402	return didTrim;
1403
1404	}
1405
1406/*****************************************************************************/
1407
1408bool dng_string::IsEmpty () const
1409	{
1410
1411	const char *s = Get ();
1412
1413	return *s == 0;
1414
1415	}
1416
1417/*****************************************************************************/
1418
1419uint32 dng_string::Length () const
1420	{
1421
1422	const char *s = Get ();
1423
1424	return strlenAsUint32 (s);
1425
1426	}
1427
1428/*****************************************************************************/
1429
1430bool dng_string::operator== (const dng_string &s) const
1431	{
1432
1433	const char *s1 =   Get ();
1434	const char *s2 = s.Get ();
1435
1436	return strcmp (s1, s2) == 0;
1437
1438	}
1439
1440/*****************************************************************************/
1441
1442bool dng_string::Matches (const char *t,
1443						  const char *s,
1444						  bool case_sensitive)
1445	{
1446
1447	while (*s != 0)
1448		{
1449
1450		char c1 = *(s++);
1451		char c2 = *(t++);
1452
1453		if (!case_sensitive)
1454			{
1455			c1 = ForceUppercase (c1);
1456			c2 = ForceUppercase (c2);
1457			}
1458
1459		if (c1 != c2)
1460			{
1461			return false;
1462			}
1463
1464		}
1465
1466	return (*t == 0);
1467
1468	}
1469
1470/*****************************************************************************/
1471
1472bool dng_string::Matches (const char *s,
1473						  bool case_sensitive) const
1474	{
1475
1476	return dng_string::Matches (Get (), s, case_sensitive);
1477
1478	}
1479
1480/*****************************************************************************/
1481
1482bool dng_string::StartsWith (const char *s,
1483						     bool case_sensitive) const
1484	{
1485
1486	const char *t = Get ();
1487
1488	while (*s != 0)
1489		{
1490
1491		char c1 = *(s++);
1492		char c2 = *(t++);
1493
1494		if (!case_sensitive)
1495			{
1496			c1 = ForceUppercase (c1);
1497			c2 = ForceUppercase (c2);
1498			}
1499
1500		if (c1 != c2)
1501			{
1502			return false;
1503			}
1504
1505		}
1506
1507	return true;
1508
1509	}
1510
1511/*****************************************************************************/
1512
1513bool dng_string::EndsWith (const char *s,
1514						   bool case_sensitive) const
1515	{
1516
1517	uint32 len1 = Length ();
1518
1519	uint32 len2 = strlenAsUint32 (s);
1520
1521	if (len1 < len2)
1522		{
1523		return false;
1524		}
1525
1526	const char *t = Get () + (len1 - len2);
1527
1528	while (*s != 0)
1529		{
1530
1531		char c1 = *(s++);
1532		char c2 = *(t++);
1533
1534		if (!case_sensitive)
1535			{
1536			c1 = ForceUppercase (c1);
1537			c2 = ForceUppercase (c2);
1538			}
1539
1540		if (c1 != c2)
1541			{
1542			return false;
1543			}
1544
1545		}
1546
1547	return true;
1548
1549	}
1550
1551/*****************************************************************************/
1552
1553bool dng_string::Contains (const char *s,
1554						   bool case_sensitive,
1555						   int32 *match_offset) const
1556	{
1557
1558	if (match_offset)
1559		{
1560		*match_offset = -1;
1561		}
1562
1563	uint32 len1 = Length ();
1564
1565	uint32 len2 = strlenAsUint32 (s);
1566
1567	if (len1 < len2)
1568		{
1569		return false;
1570		}
1571
1572	uint32 offsets = len1 - len2;
1573
1574	for (uint32 offset = 0; offset <= offsets; offset++)
1575		{
1576
1577		const char *ss = s;
1578		const char *tt = Get () + offset;
1579
1580		while (*ss != 0)
1581			{
1582
1583			char c1 = *(ss++);
1584			char c2 = *(tt++);
1585
1586			if (!case_sensitive)
1587				{
1588				c1 = ForceUppercase (c1);
1589				c2 = ForceUppercase (c2);
1590				}
1591
1592			if (c1 != c2)
1593				{
1594				goto tryNextOffset;
1595				}
1596
1597			}
1598
1599		if (match_offset)
1600			{
1601			*match_offset = offset;
1602			}
1603
1604		return true;
1605
1606		tryNextOffset:	;
1607
1608		}
1609
1610	return false;
1611
1612	}
1613
1614/*****************************************************************************/
1615
1616bool dng_string::Replace (const char *old_string,
1617						  const char *new_string,
1618						  bool case_sensitive)
1619	{
1620
1621	int32 match_offset = -1;
1622
1623	if (Contains (old_string,
1624				  case_sensitive,
1625				  &match_offset))
1626		{
1627
1628		uint32 len1 = Length ();
1629
1630		uint32 len2 = strlenAsUint32 (old_string);
1631		uint32 len3 = strlenAsUint32 (new_string);
1632
1633		if (len2 == len3)
1634			{
1635
1636			strncpy (fData.Buffer_char () + match_offset,
1637					 new_string,
1638					 len3);
1639
1640			}
1641
1642		else if (len2 > len3)
1643			{
1644
1645			strncpy (fData.Buffer_char () + match_offset,
1646					 new_string,
1647					 len3);
1648
1649			const char *s = fData.Buffer_char () + match_offset + len2;
1650				  char *d = fData.Buffer_char () + match_offset + len3;
1651
1652			uint32 extra = len1 - match_offset - len2 + 1;	// + 1 for NULL termination
1653
1654			for (uint32 j = 0; j < extra; j++)
1655				{
1656				*(d++) = *(s++);
1657				}
1658
1659			}
1660
1661		else
1662			{
1663
1664			// "len1 - len2" cannot wrap around because we know that if this
1665			// string contains old_string, len1 >= len2 must hold.
1666			dng_memory_data tempBuffer (
1667				SafeUint32Add (SafeUint32Add (len1 - len2, len3), 1));
1668
1669			if (match_offset)
1670				{
1671
1672				strncpy (tempBuffer.Buffer_char (),
1673						 fData     .Buffer_char (),
1674						 match_offset);
1675
1676				}
1677
1678			if (len3)
1679				{
1680
1681				strncpy (tempBuffer.Buffer_char () + match_offset,
1682						 new_string,
1683						 len3);
1684
1685				}
1686
1687			uint32 extra = len1 - match_offset - len2 + 1;	// + 1 for NULL termination
1688
1689			strncpy (tempBuffer.Buffer_char () + match_offset + len3,
1690					 fData     .Buffer_char () + match_offset + len2,
1691					 extra);
1692
1693			Set (tempBuffer.Buffer_char ());
1694
1695			}
1696
1697		return true;
1698
1699		}
1700
1701	return false;
1702
1703	}
1704
1705/*****************************************************************************/
1706
1707bool dng_string::TrimLeading (const char *s,
1708						      bool case_sensitive)
1709	{
1710
1711	if (StartsWith (s, case_sensitive))
1712		{
1713
1714		Set (Get () + strlenAsUint32 (s));
1715
1716		return true;
1717
1718		}
1719
1720	return false;
1721
1722	}
1723
1724/*****************************************************************************/
1725
1726void dng_string::Append (const char *s)
1727	{
1728
1729	uint32 len2 = strlenAsUint32 (s);
1730
1731	if (len2)
1732		{
1733
1734		uint32 len1 = Length ();
1735
1736		dng_memory_data temp (SafeUint32Add (SafeUint32Add (len1, len2), 1));
1737
1738		char *buffer = temp.Buffer_char ();
1739
1740		if (len1)
1741			{
1742			memcpy (buffer, Get (), len1);
1743			}
1744
1745		memcpy (buffer + len1, s, len2 + 1);
1746
1747		Set (buffer);
1748
1749		}
1750
1751	}
1752
1753/*****************************************************************************/
1754
1755void dng_string::SetUppercase ()
1756	{
1757
1758	if (fData.Buffer ())
1759		{
1760
1761		uint32 len = Length ();
1762
1763		char *dPtr = fData.Buffer_char ();
1764
1765		for (uint32 j = 0; j < len; j++)
1766			{
1767
1768			char c = dPtr [j];
1769
1770			if (c >= 'a' && c <= 'z')
1771				{
1772
1773				dPtr [j] = c - 'a' + 'A';
1774
1775				}
1776
1777			}
1778
1779		}
1780
1781	}
1782
1783/*****************************************************************************/
1784
1785void dng_string::SetLowercase ()
1786	{
1787
1788	if (fData.Buffer ())
1789		{
1790
1791		uint32 len = Length ();
1792
1793		char *dPtr = fData.Buffer_char ();
1794
1795		for (uint32 j = 0; j < len; j++)
1796			{
1797
1798			char c = dPtr [j];
1799
1800			if (c >= 'A' && c <= 'Z')
1801				{
1802
1803				dPtr [j] = c - 'A' + 'a';
1804
1805				}
1806
1807			}
1808
1809		}
1810
1811	}
1812
1813/*****************************************************************************/
1814
1815void dng_string::SetLineEndings (char ending)
1816	{
1817
1818	if (fData.Buffer ())
1819		{
1820
1821		const char *sPtr = fData.Buffer_char ();
1822		      char *dPtr = fData.Buffer_char ();
1823
1824		while (*sPtr)
1825			{
1826
1827			char c = *(sPtr++);
1828
1829			char nc = sPtr [0];
1830
1831			if ((c == '\r' && nc == '\n') ||
1832				(c == '\n' && nc == '\r'))
1833				{
1834
1835				sPtr++;
1836
1837				if (ending)
1838					{
1839					*(dPtr++) = ending;
1840					}
1841
1842				}
1843
1844			else if (c == '\n' ||
1845					 c == '\r')
1846				{
1847
1848				if (ending)
1849					{
1850					*(dPtr++) = ending;
1851					}
1852
1853				}
1854
1855			else
1856				{
1857
1858				*(dPtr++) = c;
1859
1860				}
1861
1862			}
1863
1864		*dPtr = 0;
1865
1866		}
1867
1868	}
1869
1870/*****************************************************************************/
1871
1872void dng_string::StripLowASCII ()
1873	{
1874
1875	if (fData.Buffer ())
1876		{
1877
1878		const char *sPtr = fData.Buffer_char ();
1879		      char *dPtr = fData.Buffer_char ();
1880
1881		while (*sPtr)
1882			{
1883
1884			char c = *(sPtr++);
1885
1886			if (c == '\r' || c == '\n' || (uint8) c >= ' ')
1887				{
1888
1889				*(dPtr++) = c;
1890
1891				}
1892
1893			}
1894
1895		*dPtr = 0;
1896
1897		}
1898
1899	}
1900
1901/*****************************************************************************/
1902
1903void dng_string::NormalizeAsCommaSeparatedNumbers ()
1904	{
1905
1906	if (fData.Buffer ())
1907		{
1908
1909		const char *sPtr = fData.Buffer_char ();
1910			  char *dPtr = fData.Buffer_char ();
1911
1912		bool commaInserted = false;
1913
1914		while (*sPtr)
1915			{
1916
1917			uint32 c = DecodeUTF8 (sPtr);
1918
1919			// Support number formats such as "3", "+3.0", "-3.1416", "314.16e-2",
1920			// "0.31416E1", but no hex/octal number representations.
1921
1922			if (isdigit ((int) c) || c == '.' || c == '-' || c == '+' || c == 'e' || c == 'E')
1923				{
1924
1925				*(dPtr++) = (char) c;
1926
1927				if (commaInserted)
1928					{
1929
1930					commaInserted = false;
1931
1932					}
1933
1934				}
1935
1936			else if (!commaInserted)
1937				{
1938
1939				*(dPtr++) = ',';
1940
1941				commaInserted = true;
1942
1943				}
1944
1945			}
1946
1947		*dPtr = 0;
1948
1949		}
1950
1951	}
1952
1953/******************************************************************************/
1954
1955// Unicode to low-ASCII strings table.
1956
1957struct UnicodeToLowASCIIEntry
1958	{
1959	uint32 unicode;
1960	const char *ascii;
1961	};
1962
1963static const UnicodeToLowASCIIEntry kUnicodeToLowASCII [] =
1964	{
1965	{	0x00A0, " "		},
1966	{	0x00A1, "!"		},
1967	{	0x00A9, "(C)"	},
1968	{	0x00AA, "a"		},
1969	{	0x00AB, "<<"	},
1970	{	0x00AC, "!"		},
1971	{	0x00AE, "(R)"	},
1972	{	0x00B0, "dg"	},
1973	{	0x00B1, "+-"	},
1974	{	0x00B7, "."		},
1975	{	0x00BA, "o"		},
1976	{	0x00BB, ">>"	},
1977	{	0x00BF, "?"		},
1978	{	0x00C0, "A"		},
1979	{	0x00C1, "A"		},
1980	{	0x00C2, "A"		},
1981	{	0x00C3, "A"		},
1982	{	0x00C4, "A"		},
1983	{	0x00C5, "A"		},
1984	{	0x00C6, "AE"	},
1985	{	0x00C7, "C"		},
1986	{	0x00C8, "E"		},
1987	{	0x00C9, "E"		},
1988	{	0x00CA, "E"		},
1989	{	0x00CB, "E"		},
1990	{	0x00CC, "I"		},
1991	{	0x00CD, "I"		},
1992	{	0x00CE, "I"		},
1993	{	0x00CF, "I"		},
1994	{	0x00D1, "N"		},
1995	{	0x00D2, "O"		},
1996	{	0x00D3, "O"		},
1997	{	0x00D4, "O"		},
1998	{	0x00D5, "O"		},
1999	{	0x00D6, "O"		},
2000	{	0x00D8, "O"		},
2001	{	0x00D9, "U"		},
2002	{	0x00DA, "U"		},
2003	{	0x00DB, "U"		},
2004	{	0x00DC, "U"		},
2005	{	0x00DD, "Y"		},
2006	{	0x00E0, "a"		},
2007	{	0x00E1, "a"		},
2008	{	0x00E2, "a"		},
2009	{	0x00E3, "a"		},
2010	{	0x00E4, "a"		},
2011	{	0x00E5, "a"		},
2012	{	0x00E6, "ae"	},
2013	{	0x00E7, "c"		},
2014	{	0x00E8, "e"		},
2015	{	0x00E9, "e"		},
2016	{	0x00EA, "e"		},
2017	{	0x00EB, "e"		},
2018	{	0x00EC, "i"		},
2019	{	0x00ED, "i"		},
2020	{	0x00EE, "i"		},
2021	{	0x00EF, "i"		},
2022	{	0x00F1, "n"		},
2023	{	0x00F2, "o"		},
2024	{	0x00F3, "o"		},
2025	{	0x00F4, "o"		},
2026	{	0x00F5, "o"		},
2027	{	0x00F6, "o"		},
2028	{	0x00F7, "/"		},
2029	{	0x00F8, "o"		},
2030	{	0x00F9, "u"		},
2031	{	0x00FA, "u"		},
2032	{	0x00FB, "u"		},
2033	{	0x00FC, "u"		},
2034	{	0x00FD, "y"		},
2035	{	0x00FF, "y"		},
2036	{	0x0131, "i"		},
2037	{	0x0152, "OE"	},
2038	{	0x0153, "oe"	},
2039	{	0x0178, "Y"		},
2040	{	0x2013, "-"		},
2041	{	0x2014, "-"		},
2042	{	0x2018, "'"		},
2043	{	0x2019, "'"		},
2044	{	0x201A, ","		},
2045	{	0x201C, "\""	},
2046	{	0x201D, "\""	},
2047	{	0x201E, ",,"	},
2048	{	0x2022, "."		},
2049	{	0x2026, "..."	},
2050	{	0x2039, "<"		},
2051	{	0x203A, ">"		},
2052	{	0x2044, "/"		},
2053	{	0x2122, "TM"	},
2054	{	0x2206, "d"		},
2055	{	0x2211, "S"		},
2056	{	0x2260, "!="	},
2057	{	0x2264, "<="	},
2058	{	0x2265, ">="	},
2059	{	0x2318, "#"		},
2060	{	0xFB01, "fi"	},
2061	{	0xFB02, "fl"	}
2062	};
2063
2064/******************************************************************************/
2065
2066void dng_string::ForceASCII ()
2067	{
2068
2069	if (!IsASCII ())
2070		{
2071
2072		uint32 tempBufferSize =
2073			SafeUint32Add (SafeUint32Mult(Length(), 3), 1);
2074		dng_memory_data tempBuffer (tempBufferSize);
2075
2076		char *dPtr = tempBuffer.Buffer_char ();
2077		char * const destEnd = dPtr + tempBufferSize;
2078
2079		const char *sPtr = Get ();
2080
2081		while (*sPtr)
2082			{
2083
2084			uint32 x = DecodeUTF8 (sPtr);
2085
2086			if (x <= 0x007F)
2087				{
2088
2089				CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2090				*(dPtr++) = (char) x;
2091
2092				}
2093
2094			else
2095				{
2096
2097				const char *ascii = NULL;
2098
2099				const uint32 kTableEntrys = sizeof (kUnicodeToLowASCII    ) /
2100									        sizeof (kUnicodeToLowASCII [0]);
2101
2102				for (uint32 entry = 0; entry < kTableEntrys; entry++)
2103					{
2104
2105					if (kUnicodeToLowASCII [entry] . unicode == x)
2106						{
2107
2108						ascii = kUnicodeToLowASCII [entry] . ascii;
2109
2110						break;
2111
2112						}
2113
2114					}
2115
2116				if (ascii)
2117					{
2118
2119					while (*ascii)
2120						{
2121
2122						CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2123						*(dPtr++) = *(ascii++);
2124
2125						}
2126
2127					}
2128
2129				else
2130					{
2131
2132					CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2133					*(dPtr++) ='?';
2134
2135					}
2136
2137				}
2138
2139			}
2140
2141		CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2142		*dPtr = 0;
2143
2144		Set (tempBuffer.Buffer_char ());
2145
2146		}
2147
2148	}
2149
2150/******************************************************************************/
2151
2152static dng_mutex gProtectUCCalls ("gProtectUCCalls");
2153
2154/******************************************************************************/
2155
2156int32 dng_string::Compare (const dng_string &s) const
2157	{
2158
2159	#if qMacOS
2160	#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
2161
2162		// TODO: Needs implementation.
2163		ThrowProgramError ("Compare() not implemented on iOS");
2164		return 0;
2165
2166	#else
2167
2168		{
2169
2170		dng_memory_data aStrA;
2171		dng_memory_data aStrB;
2172
2173		uint32 aLenA = this->Get_UTF16 (aStrA);
2174		uint32 aLenB = s    .Get_UTF16 (aStrB);
2175
2176		if (aLenA > 0)
2177			{
2178
2179			if (aLenB > 0)
2180				{
2181
2182				// For some Mac OS versions anyway, UCCompareTextDefault is not
2183				// thread safe.
2184
2185				dng_lock_mutex lockMutex (&gProtectUCCalls);
2186
2187				UCCollateOptions aOptions = kUCCollateStandardOptions |
2188											kUCCollatePunctuationSignificantMask;
2189
2190				SInt32 aOrder = -1;
2191
2192				Boolean aEqual = false;
2193
2194				OSStatus searchStatus = ::UCCompareTextDefault (aOptions,
2195																aStrA.Buffer_uint16 (),
2196																aLenA,
2197																aStrB.Buffer_uint16 (),
2198																aLenB,
2199																&aEqual,
2200																&aOrder);
2201
2202				if (searchStatus == noErr)
2203					{
2204
2205					if (aEqual || (aOrder == 0))
2206						{
2207						return 0;
2208						}
2209
2210					else
2211						{
2212						return (aOrder > 0) ? 1 : -1;
2213						}
2214
2215					}
2216
2217				else
2218					{
2219
2220					DNG_REPORT ("UCCompareTextDefault failed");
2221
2222					return -1;
2223
2224					}
2225
2226				}
2227
2228			else
2229				{
2230				return 1;
2231				}
2232
2233			}
2234
2235		else
2236			{
2237
2238			if (aLenB > 0)
2239				{
2240				return -1;
2241				}
2242
2243			else
2244				{
2245				return 0;
2246				}
2247
2248			}
2249
2250		}
2251
2252	#endif  // TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR
2253
2254	#elif qWinOS
2255
2256		{
2257
2258		dng_memory_data aStrA;
2259		dng_memory_data aStrB;
2260
2261		uint32 aLenA = this->Get_UTF16 (aStrA);
2262		uint32 aLenB = s    .Get_UTF16 (aStrB);
2263
2264		if (aLenA > 0)
2265			{
2266
2267			if (aLenB > 0)
2268				{
2269
2270				LCID locale = LOCALE_SYSTEM_DEFAULT;
2271
2272				DWORD aFlags = NORM_IGNOREWIDTH;
2273
2274				int aOrder = ::CompareStringW (locale,
2275											   aFlags,
2276											   (const WCHAR *) aStrA.Buffer_uint16 (),
2277											   aLenA,
2278											   (const WCHAR *) aStrB.Buffer_uint16 (),
2279											   aLenB);
2280
2281				if (aOrder == CSTR_EQUAL)
2282					{
2283					return 0;
2284					}
2285
2286				else if (aOrder == CSTR_GREATER_THAN)
2287					{
2288					return 1;
2289					}
2290
2291				else
2292					{
2293					return -1;
2294					}
2295
2296				}
2297
2298			else
2299				{
2300				return 1;
2301				}
2302
2303			}
2304
2305		else
2306			{
2307
2308			if (aLenB > 0)
2309				{
2310				return -1;
2311				}
2312			else
2313				{
2314				return 0;
2315				}
2316
2317			}
2318
2319		}
2320
2321	#else
2322
2323	// Fallback to a pure Unicode sort order.
2324
2325		{
2326
2327		for (uint32 pass = 0; pass < 2; pass++)
2328			{
2329
2330			const char *aPtr =   Get ();
2331			const char *bPtr = s.Get ();
2332
2333			while (*aPtr || *bPtr)
2334				{
2335
2336				if (!bPtr)
2337					{
2338					return 1;
2339					}
2340
2341				else if (!aPtr)
2342					{
2343					return -1;
2344					}
2345
2346				uint32 a = DecodeUTF8 (aPtr);
2347				uint32 b = DecodeUTF8 (bPtr);
2348
2349				// Ignore case on first compare pass.
2350
2351				if (pass == 0)
2352					{
2353
2354					if (a >= (uint32) 'a' && a <= (uint32) 'z')
2355						{
2356						a = a - (uint32) 'a' + (uint32) 'A';
2357						}
2358
2359					if (b >= (uint32) 'a' && b <= (uint32) 'z')
2360						{
2361						b = b - (uint32) 'a' + (uint32) 'A';
2362						}
2363
2364					}
2365
2366				if (b > a)
2367					{
2368					return 1;
2369					}
2370
2371				else if (a < b)
2372					{
2373					return -1;
2374					}
2375
2376				}
2377
2378			}
2379
2380		}
2381
2382	#endif
2383
2384	return 0;
2385
2386	}
2387
2388/*****************************************************************************/
2389