1/*
2www.sourceforge.net/projects/tinyxml
3Original code by Lee Thomason (www.grinninglizard.com)
4
5This software is provided 'as-is', without any express or implied
6warranty. In no event will the authors be held liable for any
7damages arising from the use of this software.
8
9Permission is granted to anyone to use this software for any
10purpose, including commercial applications, and to alter it and
11redistribute it freely, subject to the following restrictions:
12
131. The origin of this software must not be misrepresented; you must
14not claim that you wrote the original software. If you use this
15software in a product, an acknowledgment in the product documentation
16would be appreciated but is not required.
17
182. Altered source versions must be plainly marked as such, and
19must not be misrepresented as being the original software.
20
213. This notice may not be removed or altered from any source
22distribution.
23*/
24
25#include <ctype.h>
26#include <stddef.h>
27
28#include "tinyxml.h"
29
30//#define DEBUG_PARSER
31#if defined( DEBUG_PARSER )
32#	if defined( DEBUG ) && defined( _MSC_VER )
33#		include <windows.h>
34#		define TIXML_LOG OutputDebugString
35#	else
36#		define TIXML_LOG printf
37#	endif
38#endif
39
40// Note tha "PutString" hardcodes the same list. This
41// is less flexible than it appears. Changing the entries
42// or order will break putstring.
43TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
44{
45	{ "&amp;",  5, '&' },
46	{ "&lt;",   4, '<' },
47	{ "&gt;",   4, '>' },
48	{ "&quot;", 6, '\"' },
49	{ "&apos;", 6, '\'' }
50};
51
52// Bunch of unicode info at:
53//		http://www.unicode.org/faq/utf_bom.html
54// Including the basic of this table, which determines the #bytes in the
55// sequence from the lead byte. 1 placed for invalid sequences --
56// although the result will be junk, pass it through as much as possible.
57// Beware of the non-characters in UTF-8:
58//				ef bb bf (Microsoft "lead bytes")
59//				ef bf be
60//				ef bf bf
61
62const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
63const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
64const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
65
66const int TiXmlBase::utf8ByteTable[256] =
67{
68	//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
69		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x00
70		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x10
71		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x20
72		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x30
73		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x40
74		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x50
75		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x60
76		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x70	End of ASCII range
77		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x80 0x80 to 0xc1 invalid
78		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x90
79		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xa0
80		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xb0
81		1,	1,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xc0 0xc2 to 0xdf 2 byte
82		2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xd0
83		3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	// 0xe0 0xe0 to 0xef 3 byte
84		4,	4,	4,	4,	4,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1	// 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
85};
86
87
88void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
89{
90	const unsigned long BYTE_MASK = 0xBF;
91	const unsigned long BYTE_MARK = 0x80;
92	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
93
94	if (input < 0x80)
95		*length = 1;
96	else if ( input < 0x800 )
97		*length = 2;
98	else if ( input < 0x10000 )
99		*length = 3;
100	else if ( input < 0x200000 )
101		*length = 4;
102	else
103		{ *length = 0; return; }	// This code won't covert this correctly anyway.
104
105	output += *length;
106
107	// Scary scary fall throughs.
108	switch (*length)
109	{
110		case 4:
111			--output;
112			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
113			input >>= 6;
114		case 3:
115			--output;
116			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
117			input >>= 6;
118		case 2:
119			--output;
120			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
121			input >>= 6;
122		case 1:
123			--output;
124			*output = (char)(input | FIRST_BYTE_MARK[*length]);
125	}
126}
127
128
129/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
130{
131	// This will only work for low-ascii, everything else is assumed to be a valid
132	// letter. I'm not sure this is the best approach, but it is quite tricky trying
133	// to figure out alhabetical vs. not across encoding. So take a very
134	// conservative approach.
135
136//	if ( encoding == TIXML_ENCODING_UTF8 )
137//	{
138		if ( anyByte < 127 )
139			return isalpha( anyByte );
140		else
141			return 1;	// What else to do? The unicode set is huge...get the english ones right.
142//	}
143//	else
144//	{
145//		return isalpha( anyByte );
146//	}
147}
148
149
150/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
151{
152	// This will only work for low-ascii, everything else is assumed to be a valid
153	// letter. I'm not sure this is the best approach, but it is quite tricky trying
154	// to figure out alhabetical vs. not across encoding. So take a very
155	// conservative approach.
156
157//	if ( encoding == TIXML_ENCODING_UTF8 )
158//	{
159		if ( anyByte < 127 )
160			return isalnum( anyByte );
161		else
162			return 1;	// What else to do? The unicode set is huge...get the english ones right.
163//	}
164//	else
165//	{
166//		return isalnum( anyByte );
167//	}
168}
169
170
171class TiXmlParsingData
172{
173	friend class TiXmlDocument;
174  public:
175	void Stamp( const char* now, TiXmlEncoding encoding );
176
177	const TiXmlCursor& Cursor() const	{ return cursor; }
178
179  private:
180	// Only used by the document!
181	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
182	{
183		assert( start );
184		stamp = start;
185		tabsize = _tabsize;
186		cursor.row = row;
187		cursor.col = col;
188	}
189
190	TiXmlCursor		cursor;
191	const char*		stamp;
192	int				tabsize;
193};
194
195
196void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
197{
198	assert( now );
199
200	// Do nothing if the tabsize is 0.
201	if ( tabsize < 1 )
202	{
203		return;
204	}
205
206	// Get the current row, column.
207	int row = cursor.row;
208	int col = cursor.col;
209	const char* p = stamp;
210	assert( p );
211
212	while ( p < now )
213	{
214		// Treat p as unsigned, so we have a happy compiler.
215		const unsigned char* pU = (const unsigned char*)p;
216
217		// Code contributed by Fletcher Dunn: (modified by lee)
218		switch (*pU) {
219			case 0:
220				// We *should* never get here, but in case we do, don't
221				// advance past the terminating null character, ever
222				return;
223
224			case '\r':
225				// bump down to the next line
226				++row;
227				col = 0;
228				// Eat the character
229				++p;
230
231				// Check for \r\n sequence, and treat this as a single character
232				if (*p == '\n') {
233					++p;
234				}
235				break;
236
237			case '\n':
238				// bump down to the next line
239				++row;
240				col = 0;
241
242				// Eat the character
243				++p;
244
245				// Check for \n\r sequence, and treat this as a single
246				// character.  (Yes, this bizarre thing does occur still
247				// on some arcane platforms...)
248				if (*p == '\r') {
249					++p;
250				}
251				break;
252
253			case '\t':
254				// Eat the character
255				++p;
256
257				// Skip to next tab stop
258				col = (col / tabsize + 1) * tabsize;
259				break;
260
261			case TIXML_UTF_LEAD_0:
262				if ( encoding == TIXML_ENCODING_UTF8 )
263				{
264					if ( *(p+1) && *(p+2) )
265					{
266						// In these cases, don't advance the column. These are
267						// 0-width spaces.
268						if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
269							p += 3;
270						else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
271							p += 3;
272						else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
273							p += 3;
274						else
275							{ p +=3; ++col; }	// A normal character.
276					}
277				}
278				else
279				{
280					++p;
281					++col;
282				}
283				break;
284
285			default:
286				if ( encoding == TIXML_ENCODING_UTF8 )
287				{
288					// Eat the 1 to 4 byte utf8 character.
289					int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
290					if ( step == 0 )
291						step = 1;		// Error case from bad encoding, but handle gracefully.
292					p += step;
293
294					// Just advance one column, of course.
295					++col;
296				}
297				else
298				{
299					++p;
300					++col;
301				}
302				break;
303		}
304	}
305	cursor.row = row;
306	cursor.col = col;
307	assert( cursor.row >= -1 );
308	assert( cursor.col >= -1 );
309	stamp = p;
310	assert( stamp );
311}
312
313
314const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
315{
316	if ( !p || !*p )
317	{
318		return 0;
319	}
320	if ( encoding == TIXML_ENCODING_UTF8 )
321	{
322		while ( *p )
323		{
324			const unsigned char* pU = (const unsigned char*)p;
325
326			// Skip the stupid Microsoft UTF-8 Byte order marks
327			if (	*(pU+0)==TIXML_UTF_LEAD_0
328				 && *(pU+1)==TIXML_UTF_LEAD_1
329				 && *(pU+2)==TIXML_UTF_LEAD_2 )
330			{
331				p += 3;
332				continue;
333			}
334			else if(*(pU+0)==TIXML_UTF_LEAD_0
335				 && *(pU+1)==0xbfU
336				 && *(pU+2)==0xbeU )
337			{
338				p += 3;
339				continue;
340			}
341			else if(*(pU+0)==TIXML_UTF_LEAD_0
342				 && *(pU+1)==0xbfU
343				 && *(pU+2)==0xbfU )
344			{
345				p += 3;
346				continue;
347			}
348
349			if ( IsWhiteSpace( *p ) )		// Still using old rules for white space.
350				++p;
351			else
352				break;
353		}
354	}
355	else
356	{
357		while ( *p && IsWhiteSpace( *p ) )
358			++p;
359	}
360
361	return p;
362}
363
364#ifdef TIXML_USE_STL
365/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
366{
367	for( ;; )
368	{
369		if ( !in->good() ) return false;
370
371		int c = in->peek();
372		// At this scope, we can't get to a document. So fail silently.
373		if ( !IsWhiteSpace( c ) || c <= 0 )
374			return true;
375
376		*tag += (char) in->get();
377	}
378}
379
380/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
381{
382	//assert( character > 0 && character < 128 );	// else it won't work in utf-8
383	while ( in->good() )
384	{
385		int c = in->peek();
386		if ( c == character )
387			return true;
388		if ( c <= 0 )		// Silent failure: can't get document at this scope
389			return false;
390
391		in->get();
392		*tag += (char) c;
393	}
394	return false;
395}
396#endif
397
398// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
399// "assign" optimization removes over 10% of the execution time.
400//
401const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
402{
403	// Oddly, not supported on some comilers,
404	//name->clear();
405	// So use this:
406	*name = "";
407	assert( p );
408
409	// Names start with letters or underscores.
410	// Of course, in unicode, tinyxml has no idea what a letter *is*. The
411	// algorithm is generous.
412	//
413	// After that, they can be letters, underscores, numbers,
414	// hyphens, or colons. (Colons are valid ony for namespaces,
415	// but tinyxml can't tell namespaces from names.)
416	if (    p && *p
417		 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
418	{
419		const char* start = p;
420		while(		p && *p
421				&&	(		IsAlphaNum( (unsigned char ) *p, encoding )
422						 || *p == '_'
423						 || *p == '-'
424						 || *p == '.'
425						 || *p == ':' ) )
426		{
427			//(*name) += *p; // expensive
428			++p;
429		}
430		if ( p-start > 0 ) {
431			name->assign( start, p-start );
432		}
433		return p;
434	}
435	return 0;
436}
437
438const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
439{
440	// Presume an entity, and pull it out.
441    TIXML_STRING ent;
442	int i;
443	*length = 0;
444
445	if ( *(p+1) && *(p+1) == '#' && *(p+2) )
446	{
447		unsigned long ucs = 0;
448		ptrdiff_t delta = 0;
449		unsigned mult = 1;
450
451		if ( *(p+2) == 'x' )
452		{
453			// Hexadecimal.
454			if ( !*(p+3) ) return 0;
455
456			const char* q = p+3;
457			q = strchr( q, ';' );
458
459			if ( !q || !*q ) return 0;
460
461			delta = q-p;
462			--q;
463
464			while ( *q != 'x' )
465			{
466				if ( *q >= '0' && *q <= '9' )
467					ucs += mult * (*q - '0');
468				else if ( *q >= 'a' && *q <= 'f' )
469					ucs += mult * (*q - 'a' + 10);
470				else if ( *q >= 'A' && *q <= 'F' )
471					ucs += mult * (*q - 'A' + 10 );
472				else
473					return 0;
474				mult *= 16;
475				--q;
476			}
477		}
478		else
479		{
480			// Decimal.
481			if ( !*(p+2) ) return 0;
482
483			const char* q = p+2;
484			q = strchr( q, ';' );
485
486			if ( !q || !*q ) return 0;
487
488			delta = q-p;
489			--q;
490
491			while ( *q != '#' )
492			{
493				if ( *q >= '0' && *q <= '9' )
494					ucs += mult * (*q - '0');
495				else
496					return 0;
497				mult *= 10;
498				--q;
499			}
500		}
501		if ( encoding == TIXML_ENCODING_UTF8 )
502		{
503			// convert the UCS to UTF-8
504			ConvertUTF32ToUTF8( ucs, value, length );
505		}
506		else
507		{
508			*value = (char)ucs;
509			*length = 1;
510		}
511		return p + delta + 1;
512	}
513
514	// Now try to match it.
515	for( i=0; i<NUM_ENTITY; ++i )
516	{
517		if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
518		{
519			assert( strlen( entity[i].str ) == entity[i].strLength );
520			*value = entity[i].chr;
521			*length = 1;
522			return ( p + entity[i].strLength );
523		}
524	}
525
526	// So it wasn't an entity, its unrecognized, or something like that.
527	*value = *p;	// Don't put back the last one, since we return it!
528	//*length = 1;	// Leave unrecognized entities - this doesn't really work.
529					// Just writes strange XML.
530	return p+1;
531}
532
533
534bool TiXmlBase::StringEqual( const char* p,
535							 const char* tag,
536							 bool ignoreCase,
537							 TiXmlEncoding encoding )
538{
539	assert( p );
540	assert( tag );
541	if ( !p || !*p )
542	{
543		assert( 0 );
544		return false;
545	}
546
547	const char* q = p;
548
549	if ( ignoreCase )
550	{
551		while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
552		{
553			++q;
554			++tag;
555		}
556
557		if ( *tag == 0 )
558			return true;
559	}
560	else
561	{
562		while ( *q && *tag && *q == *tag )
563		{
564			++q;
565			++tag;
566		}
567
568		if ( *tag == 0 )		// Have we found the end of the tag, and everything equal?
569			return true;
570	}
571	return false;
572}
573
574const char* TiXmlBase::ReadText(	const char* p,
575									TIXML_STRING * text,
576									bool trimWhiteSpace,
577									const char* endTag,
578									bool caseInsensitive,
579									TiXmlEncoding encoding )
580{
581    *text = "";
582	if (    !trimWhiteSpace			// certain tags always keep whitespace
583		 || !condenseWhiteSpace )	// if true, whitespace is always kept
584	{
585		// Keep all the white space.
586		while (	   p && *p
587				&& !StringEqual( p, endTag, caseInsensitive, encoding )
588			  )
589		{
590			int len;
591			char cArr[4] = { 0, 0, 0, 0 };
592			p = GetChar( p, cArr, &len, encoding );
593			text->append( cArr, len );
594		}
595	}
596	else
597	{
598		bool whitespace = false;
599
600		// Remove leading white space:
601		p = SkipWhiteSpace( p, encoding );
602		while (	   p && *p
603				&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
604		{
605			if ( *p == '\r' || *p == '\n' )
606			{
607				whitespace = true;
608				++p;
609			}
610			else if ( IsWhiteSpace( *p ) )
611			{
612				whitespace = true;
613				++p;
614			}
615			else
616			{
617				// If we've found whitespace, add it before the
618				// new character. Any whitespace just becomes a space.
619				if ( whitespace )
620				{
621					(*text) += ' ';
622					whitespace = false;
623				}
624				int len;
625				char cArr[4] = { 0, 0, 0, 0 };
626				p = GetChar( p, cArr, &len, encoding );
627				if ( len == 1 )
628					(*text) += cArr[0];	// more efficient
629				else
630					text->append( cArr, len );
631			}
632		}
633	}
634	if ( p && *p )
635		p += strlen( endTag );
636	return ( p && *p ) ? p : 0;
637}
638
639#ifdef TIXML_USE_STL
640
641void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
642{
643	// The basic issue with a document is that we don't know what we're
644	// streaming. Read something presumed to be a tag (and hope), then
645	// identify it, and call the appropriate stream method on the tag.
646	//
647	// This "pre-streaming" will never read the closing ">" so the
648	// sub-tag can orient itself.
649
650	if ( !StreamTo( in, '<', tag ) )
651	{
652		SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
653		return;
654	}
655
656	while ( in->good() )
657	{
658		int tagIndex = (int) tag->length();
659		while ( in->good() && in->peek() != '>' )
660		{
661			int c = in->get();
662			if ( c <= 0 )
663			{
664				SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
665				break;
666			}
667			(*tag) += (char) c;
668		}
669
670		if ( in->good() )
671		{
672			// We now have something we presume to be a node of
673			// some sort. Identify it, and call the node to
674			// continue streaming.
675			TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
676
677			if ( node )
678			{
679				node->StreamIn( in, tag );
680				bool isElement = node->ToElement() != 0;
681				delete node;
682				node = 0;
683
684				// If this is the root element, we're done. Parsing will be
685				// done by the >> operator.
686				if ( isElement )
687				{
688					return;
689				}
690			}
691			else
692			{
693				SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
694				return;
695			}
696		}
697	}
698	// We should have returned sooner.
699	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
700}
701
702#endif
703
704const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
705{
706	ClearError();
707
708	// Parse away, at the document level. Since a document
709	// contains nothing but other tags, most of what happens
710	// here is skipping white space.
711	if ( !p || !*p )
712	{
713		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
714		return 0;
715	}
716
717	// Note that, for a document, this needs to come
718	// before the while space skip, so that parsing
719	// starts from the pointer we are given.
720	location.Clear();
721	if ( prevData )
722	{
723		location.row = prevData->cursor.row;
724		location.col = prevData->cursor.col;
725	}
726	else
727	{
728		location.row = 0;
729		location.col = 0;
730	}
731	TiXmlParsingData data( p, TabSize(), location.row, location.col );
732	location = data.Cursor();
733
734	if ( encoding == TIXML_ENCODING_UNKNOWN )
735	{
736		// Check for the Microsoft UTF-8 lead bytes.
737		const unsigned char* pU = (const unsigned char*)p;
738		if (	*(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
739			 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
740			 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
741		{
742			encoding = TIXML_ENCODING_UTF8;
743			useMicrosoftBOM = true;
744		}
745	}
746
747    p = SkipWhiteSpace( p, encoding );
748	if ( !p )
749	{
750		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
751		return 0;
752	}
753
754	while ( p && *p )
755	{
756		TiXmlNode* node = Identify( p, encoding );
757		if ( node )
758		{
759			p = node->Parse( p, &data, encoding );
760			LinkEndChild( node );
761		}
762		else
763		{
764			break;
765		}
766
767		// Did we get encoding info?
768		if (    encoding == TIXML_ENCODING_UNKNOWN
769			 && node->ToDeclaration() )
770		{
771			TiXmlDeclaration* dec = node->ToDeclaration();
772			const char* enc = dec->Encoding();
773			assert( enc );
774
775			if ( *enc == 0 )
776				encoding = TIXML_ENCODING_UTF8;
777			else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
778				encoding = TIXML_ENCODING_UTF8;
779			else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
780				encoding = TIXML_ENCODING_UTF8;	// incorrect, but be nice
781			else
782				encoding = TIXML_ENCODING_LEGACY;
783		}
784
785		p = SkipWhiteSpace( p, encoding );
786	}
787
788	// Was this empty?
789	if ( !firstChild ) {
790		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
791		return 0;
792	}
793
794	// All is well.
795	return p;
796}
797
798void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
799{
800	// The first error in a chain is more accurate - don't set again!
801	if ( error )
802		return;
803
804	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
805	error   = true;
806	errorId = err;
807	errorDesc = errorString[ errorId ];
808
809	errorLocation.Clear();
810	if ( pError && data )
811	{
812		data->Stamp( pError, encoding );
813		errorLocation = data->Cursor();
814	}
815}
816
817
818TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
819{
820	TiXmlNode* returnNode = 0;
821
822	p = SkipWhiteSpace( p, encoding );
823	if( !p || !*p || *p != '<' )
824	{
825		return 0;
826	}
827
828	p = SkipWhiteSpace( p, encoding );
829
830	if ( !p || !*p )
831	{
832		return 0;
833	}
834
835	// What is this thing?
836	// - Elements start with a letter or underscore, but xml is reserved.
837	// - Comments: <!--
838	// - Decleration: <?xml
839	// - Everthing else is unknown to tinyxml.
840	//
841
842	const char* xmlHeader = { "<?xml" };
843	const char* commentHeader = { "<!--" };
844	const char* dtdHeader = { "<!" };
845	const char* cdataHeader = { "<![CDATA[" };
846
847	if ( StringEqual( p, xmlHeader, true, encoding ) )
848	{
849		#ifdef DEBUG_PARSER
850			TIXML_LOG( "XML parsing Declaration\n" );
851		#endif
852		returnNode = new TiXmlDeclaration();
853	}
854	else if ( StringEqual( p, commentHeader, false, encoding ) )
855	{
856		#ifdef DEBUG_PARSER
857			TIXML_LOG( "XML parsing Comment\n" );
858		#endif
859		returnNode = new TiXmlComment();
860	}
861	else if ( StringEqual( p, cdataHeader, false, encoding ) )
862	{
863		#ifdef DEBUG_PARSER
864			TIXML_LOG( "XML parsing CDATA\n" );
865		#endif
866		TiXmlText* text = new TiXmlText( "" );
867		text->SetCDATA( true );
868		returnNode = text;
869	}
870	else if ( StringEqual( p, dtdHeader, false, encoding ) )
871	{
872		#ifdef DEBUG_PARSER
873			TIXML_LOG( "XML parsing Unknown(1)\n" );
874		#endif
875		returnNode = new TiXmlUnknown();
876	}
877	else if (    IsAlpha( *(p+1), encoding )
878			  || *(p+1) == '_' )
879	{
880		#ifdef DEBUG_PARSER
881			TIXML_LOG( "XML parsing Element\n" );
882		#endif
883		returnNode = new TiXmlElement( "" );
884	}
885	else
886	{
887		#ifdef DEBUG_PARSER
888			TIXML_LOG( "XML parsing Unknown(2)\n" );
889		#endif
890		returnNode = new TiXmlUnknown();
891	}
892
893	if ( returnNode )
894	{
895		// Set the parent, so it can report errors
896		returnNode->parent = this;
897	}
898	return returnNode;
899}
900
901#ifdef TIXML_USE_STL
902
903void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
904{
905	// We're called with some amount of pre-parsing. That is, some of "this"
906	// element is in "tag". Go ahead and stream to the closing ">"
907	while( in->good() )
908	{
909		int c = in->get();
910		if ( c <= 0 )
911		{
912			TiXmlDocument* document = GetDocument();
913			if ( document )
914				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
915			return;
916		}
917		(*tag) += (char) c ;
918
919		if ( c == '>' )
920			break;
921	}
922
923	if ( tag->length() < 3 ) return;
924
925	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
926	// If not, identify and stream.
927
928	if (    tag->at( tag->length() - 1 ) == '>'
929		 && tag->at( tag->length() - 2 ) == '/' )
930	{
931		// All good!
932		return;
933	}
934	else if ( tag->at( tag->length() - 1 ) == '>' )
935	{
936		// There is more. Could be:
937		//		text
938		//		cdata text (which looks like another node)
939		//		closing tag
940		//		another node.
941		for ( ;; )
942		{
943			StreamWhiteSpace( in, tag );
944
945			// Do we have text?
946			if ( in->good() && in->peek() != '<' )
947			{
948				// Yep, text.
949				TiXmlText text( "" );
950				text.StreamIn( in, tag );
951
952				// What follows text is a closing tag or another node.
953				// Go around again and figure it out.
954				continue;
955			}
956
957			// We now have either a closing tag...or another node.
958			// We should be at a "<", regardless.
959			if ( !in->good() ) return;
960			assert( in->peek() == '<' );
961			int tagIndex = (int) tag->length();
962
963			bool closingTag = false;
964			bool firstCharFound = false;
965
966			for( ;; )
967			{
968				if ( !in->good() )
969					return;
970
971				int c = in->peek();
972				if ( c <= 0 )
973				{
974					TiXmlDocument* document = GetDocument();
975					if ( document )
976						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
977					return;
978				}
979
980				if ( c == '>' )
981					break;
982
983				*tag += (char) c;
984				in->get();
985
986				// Early out if we find the CDATA id.
987				if ( c == '[' && tag->size() >= 9 )
988				{
989					size_t len = tag->size();
990					const char* start = tag->c_str() + len - 9;
991					if ( strcmp( start, "<![CDATA[" ) == 0 ) {
992						assert( !closingTag );
993						break;
994					}
995				}
996
997				if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
998				{
999					firstCharFound = true;
1000					if ( c == '/' )
1001						closingTag = true;
1002				}
1003			}
1004			// If it was a closing tag, then read in the closing '>' to clean up the input stream.
1005			// If it was not, the streaming will be done by the tag.
1006			if ( closingTag )
1007			{
1008				if ( !in->good() )
1009					return;
1010
1011				int c = in->get();
1012				if ( c <= 0 )
1013				{
1014					TiXmlDocument* document = GetDocument();
1015					if ( document )
1016						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1017					return;
1018				}
1019				assert( c == '>' );
1020				*tag += (char) c;
1021
1022				// We are done, once we've found our closing tag.
1023				return;
1024			}
1025			else
1026			{
1027				// If not a closing tag, id it, and stream.
1028				const char* tagloc = tag->c_str() + tagIndex;
1029				TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1030				if ( !node )
1031					return;
1032				node->StreamIn( in, tag );
1033				delete node;
1034				node = 0;
1035
1036				// No return: go around from the beginning: text, closing tag, or node.
1037			}
1038		}
1039	}
1040}
1041#endif
1042
1043const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1044{
1045	p = SkipWhiteSpace( p, encoding );
1046	TiXmlDocument* document = GetDocument();
1047
1048	if ( !p || !*p )
1049	{
1050		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1051		return 0;
1052	}
1053
1054	if ( data )
1055	{
1056		data->Stamp( p, encoding );
1057		location = data->Cursor();
1058	}
1059
1060	if ( *p != '<' )
1061	{
1062		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1063		return 0;
1064	}
1065
1066	p = SkipWhiteSpace( p+1, encoding );
1067
1068	// Read the name.
1069	const char* pErr = p;
1070
1071    p = ReadName( p, &value, encoding );
1072	if ( !p || !*p )
1073	{
1074		if ( document )	document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1075		return 0;
1076	}
1077
1078    TIXML_STRING endTag ("</");
1079	endTag += value;
1080
1081	// Check for and read attributes. Also look for an empty
1082	// tag or an end tag.
1083	while ( p && *p )
1084	{
1085		pErr = p;
1086		p = SkipWhiteSpace( p, encoding );
1087		if ( !p || !*p )
1088		{
1089			if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1090			return 0;
1091		}
1092		if ( *p == '/' )
1093		{
1094			++p;
1095			// Empty tag.
1096			if ( *p  != '>' )
1097			{
1098				if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1099				return 0;
1100			}
1101			return (p+1);
1102		}
1103		else if ( *p == '>' )
1104		{
1105			// Done with attributes (if there were any.)
1106			// Read the value -- which can include other
1107			// elements -- read the end tag, and return.
1108			++p;
1109			p = ReadValue( p, data, encoding );		// Note this is an Element method, and will set the error if one happens.
1110			if ( !p || !*p ) {
1111				// We were looking for the end tag, but found nothing.
1112				// Fix for [ 1663758 ] Failure to report error on bad XML
1113				if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1114				return 0;
1115			}
1116
1117			// We should find the end tag now
1118			// note that:
1119			// </foo > and
1120			// </foo>
1121			// are both valid end tags.
1122			if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1123			{
1124				p += endTag.length();
1125				p = SkipWhiteSpace( p, encoding );
1126				if ( p && *p && *p == '>' ) {
1127					++p;
1128					return p;
1129				}
1130				if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1131				return 0;
1132			}
1133			else
1134			{
1135				if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1136				return 0;
1137			}
1138		}
1139		else
1140		{
1141			// Try to read an attribute:
1142			TiXmlAttribute* attrib = new TiXmlAttribute();
1143			if ( !attrib )
1144			{
1145				return 0;
1146			}
1147
1148			attrib->SetDocument( document );
1149			pErr = p;
1150			p = attrib->Parse( p, data, encoding );
1151
1152			if ( !p || !*p )
1153			{
1154				if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1155				delete attrib;
1156				return 0;
1157			}
1158
1159			// Handle the strange case of double attributes:
1160			#ifdef TIXML_USE_STL
1161			TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1162			#else
1163			TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1164			#endif
1165			if ( node )
1166			{
1167				if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1168				delete attrib;
1169				return 0;
1170			}
1171
1172			attributeSet.Add( attrib );
1173		}
1174	}
1175	return p;
1176}
1177
1178
1179const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1180{
1181	TiXmlDocument* document = GetDocument();
1182
1183	// Read in text and elements in any order.
1184	const char* pWithWhiteSpace = p;
1185	p = SkipWhiteSpace( p, encoding );
1186
1187	while ( p && *p )
1188	{
1189		if ( *p != '<' )
1190		{
1191			// Take what we have, make a text element.
1192			TiXmlText* textNode = new TiXmlText( "" );
1193
1194			if ( !textNode )
1195			{
1196			    return 0;
1197			}
1198
1199			if ( TiXmlBase::IsWhiteSpaceCondensed() )
1200			{
1201				p = textNode->Parse( p, data, encoding );
1202			}
1203			else
1204			{
1205				// Special case: we want to keep the white space
1206				// so that leading spaces aren't removed.
1207				p = textNode->Parse( pWithWhiteSpace, data, encoding );
1208			}
1209
1210			if ( !textNode->Blank() )
1211				LinkEndChild( textNode );
1212			else
1213				delete textNode;
1214		}
1215		else
1216		{
1217			// We hit a '<'
1218			// Have we hit a new element or an end tag? This could also be
1219			// a TiXmlText in the "CDATA" style.
1220			if ( StringEqual( p, "</", false, encoding ) )
1221			{
1222				return p;
1223			}
1224			else
1225			{
1226				TiXmlNode* node = Identify( p, encoding );
1227				if ( node )
1228				{
1229					p = node->Parse( p, data, encoding );
1230					LinkEndChild( node );
1231				}
1232				else
1233				{
1234					return 0;
1235				}
1236			}
1237		}
1238		pWithWhiteSpace = p;
1239		p = SkipWhiteSpace( p, encoding );
1240	}
1241
1242	if ( !p )
1243	{
1244		if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1245	}
1246	return p;
1247}
1248
1249
1250#ifdef TIXML_USE_STL
1251void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1252{
1253	while ( in->good() )
1254	{
1255		int c = in->get();
1256		if ( c <= 0 )
1257		{
1258			TiXmlDocument* document = GetDocument();
1259			if ( document )
1260				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1261			return;
1262		}
1263		(*tag) += (char) c;
1264
1265		if ( c == '>' )
1266		{
1267			// All is well.
1268			return;
1269		}
1270	}
1271}
1272#endif
1273
1274
1275const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1276{
1277	TiXmlDocument* document = GetDocument();
1278	p = SkipWhiteSpace( p, encoding );
1279
1280	if ( data )
1281	{
1282		data->Stamp( p, encoding );
1283		location = data->Cursor();
1284	}
1285	if ( !p || !*p || *p != '<' )
1286	{
1287		if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1288		return 0;
1289	}
1290	++p;
1291    value = "";
1292
1293	while ( p && *p && *p != '>' )
1294	{
1295		value += *p;
1296		++p;
1297	}
1298
1299	if ( !p )
1300	{
1301		if ( document )
1302			document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1303	}
1304	if ( p && *p == '>' )
1305		return p+1;
1306	return p;
1307}
1308
1309#ifdef TIXML_USE_STL
1310void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1311{
1312	while ( in->good() )
1313	{
1314		int c = in->get();
1315		if ( c <= 0 )
1316		{
1317			TiXmlDocument* document = GetDocument();
1318			if ( document )
1319				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1320			return;
1321		}
1322
1323		(*tag) += (char) c;
1324
1325		if ( c == '>'
1326			 && tag->at( tag->length() - 2 ) == '-'
1327			 && tag->at( tag->length() - 3 ) == '-' )
1328		{
1329			// All is well.
1330			return;
1331		}
1332	}
1333}
1334#endif
1335
1336
1337const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1338{
1339	TiXmlDocument* document = GetDocument();
1340	value = "";
1341
1342	p = SkipWhiteSpace( p, encoding );
1343
1344	if ( data )
1345	{
1346		data->Stamp( p, encoding );
1347		location = data->Cursor();
1348	}
1349	const char* startTag = "<!--";
1350	const char* endTag   = "-->";
1351
1352	if ( !StringEqual( p, startTag, false, encoding ) )
1353	{
1354		if ( document )
1355			document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1356		return 0;
1357	}
1358	p += strlen( startTag );
1359
1360	// [ 1475201 ] TinyXML parses entities in comments
1361	// Oops - ReadText doesn't work, because we don't want to parse the entities.
1362	// p = ReadText( p, &value, false, endTag, false, encoding );
1363	//
1364	// from the XML spec:
1365	/*
1366	 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
1367	              they may appear within the document type declaration at places allowed by the grammar.
1368				  They are not part of the document's character data; an XML processor MAY, but need not,
1369				  make it possible for an application to retrieve the text of comments. For compatibility,
1370				  the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
1371				  references MUST NOT be recognized within comments.
1372
1373				  An example of a comment:
1374
1375				  <!-- declarations for <head> & <body> -->
1376	*/
1377
1378    value = "";
1379	// Keep all the white space.
1380	while (	p && *p && !StringEqual( p, endTag, false, encoding ) )
1381	{
1382		value.append( p, 1 );
1383		++p;
1384	}
1385	if ( p && *p )
1386		p += strlen( endTag );
1387
1388	return p;
1389}
1390
1391
1392const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1393{
1394	p = SkipWhiteSpace( p, encoding );
1395	if ( !p || !*p ) return 0;
1396
1397	if ( data )
1398	{
1399		data->Stamp( p, encoding );
1400		location = data->Cursor();
1401	}
1402	// Read the name, the '=' and the value.
1403	const char* pErr = p;
1404	p = ReadName( p, &name, encoding );
1405	if ( !p || !*p )
1406	{
1407		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1408		return 0;
1409	}
1410	p = SkipWhiteSpace( p, encoding );
1411	if ( !p || !*p || *p != '=' )
1412	{
1413		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1414		return 0;
1415	}
1416
1417	++p;	// skip '='
1418	p = SkipWhiteSpace( p, encoding );
1419	if ( !p || !*p )
1420	{
1421		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1422		return 0;
1423	}
1424
1425	const char* end;
1426	const char SINGLE_QUOTE = '\'';
1427	const char DOUBLE_QUOTE = '\"';
1428
1429	if ( *p == SINGLE_QUOTE )
1430	{
1431		++p;
1432		end = "\'";		// single quote in string
1433		p = ReadText( p, &value, false, end, false, encoding );
1434	}
1435	else if ( *p == DOUBLE_QUOTE )
1436	{
1437		++p;
1438		end = "\"";		// double quote in string
1439		p = ReadText( p, &value, false, end, false, encoding );
1440	}
1441	else
1442	{
1443		// All attribute values should be in single or double quotes.
1444		// But this is such a common error that the parser will try
1445		// its best, even without them.
1446		value = "";
1447		while (    p && *p											// existence
1448				&& !IsWhiteSpace( *p )								// whitespace
1449				&& *p != '/' && *p != '>' )							// tag end
1450		{
1451			if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1452				// [ 1451649 ] Attribute values with trailing quotes not handled correctly
1453				// We did not have an opening quote but seem to have a
1454				// closing one. Give up and throw an error.
1455				if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1456				return 0;
1457			}
1458			value += *p;
1459			++p;
1460		}
1461	}
1462	return p;
1463}
1464
1465#ifdef TIXML_USE_STL
1466void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1467{
1468	while ( in->good() )
1469	{
1470		int c = in->peek();
1471		if ( !cdata && (c == '<' ) )
1472		{
1473			return;
1474		}
1475		if ( c <= 0 )
1476		{
1477			TiXmlDocument* document = GetDocument();
1478			if ( document )
1479				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1480			return;
1481		}
1482
1483		(*tag) += (char) c;
1484		in->get();	// "commits" the peek made above
1485
1486		if ( cdata && c == '>' && tag->size() >= 3 ) {
1487			size_t len = tag->size();
1488			if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1489				// terminator of cdata.
1490				return;
1491			}
1492		}
1493	}
1494}
1495#endif
1496
1497const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1498{
1499	value = "";
1500	TiXmlDocument* document = GetDocument();
1501
1502	if ( data )
1503	{
1504		data->Stamp( p, encoding );
1505		location = data->Cursor();
1506	}
1507
1508	const char* const startTag = "<![CDATA[";
1509	const char* const endTag   = "]]>";
1510
1511	if ( cdata || StringEqual( p, startTag, false, encoding ) )
1512	{
1513		cdata = true;
1514
1515		if ( !StringEqual( p, startTag, false, encoding ) )
1516		{
1517			if ( document )
1518				document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1519			return 0;
1520		}
1521		p += strlen( startTag );
1522
1523		// Keep all the white space, ignore the encoding, etc.
1524		while (	   p && *p
1525				&& !StringEqual( p, endTag, false, encoding )
1526			  )
1527		{
1528			value += *p;
1529			++p;
1530		}
1531
1532		TIXML_STRING dummy;
1533		p = ReadText( p, &dummy, false, endTag, false, encoding );
1534		return p;
1535	}
1536	else
1537	{
1538		bool ignoreWhite = true;
1539
1540		const char* end = "<";
1541		p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1542		if ( p && *p )
1543			return p-1;	// don't truncate the '<'
1544		return 0;
1545	}
1546}
1547
1548#ifdef TIXML_USE_STL
1549void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1550{
1551	while ( in->good() )
1552	{
1553		int c = in->get();
1554		if ( c <= 0 )
1555		{
1556			TiXmlDocument* document = GetDocument();
1557			if ( document )
1558				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1559			return;
1560		}
1561		(*tag) += (char) c;
1562
1563		if ( c == '>' )
1564		{
1565			// All is well.
1566			return;
1567		}
1568	}
1569}
1570#endif
1571
1572const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1573{
1574	p = SkipWhiteSpace( p, _encoding );
1575	// Find the beginning, find the end, and look for
1576	// the stuff in-between.
1577	TiXmlDocument* document = GetDocument();
1578	if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1579	{
1580		if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1581		return 0;
1582	}
1583	if ( data )
1584	{
1585		data->Stamp( p, _encoding );
1586		location = data->Cursor();
1587	}
1588	p += 5;
1589
1590	version = "";
1591	encoding = "";
1592	standalone = "";
1593
1594	while ( p && *p )
1595	{
1596		if ( *p == '>' )
1597		{
1598			++p;
1599			return p;
1600		}
1601
1602		p = SkipWhiteSpace( p, _encoding );
1603		if ( StringEqual( p, "version", true, _encoding ) )
1604		{
1605			TiXmlAttribute attrib;
1606			p = attrib.Parse( p, data, _encoding );
1607			version = attrib.Value();
1608		}
1609		else if ( StringEqual( p, "encoding", true, _encoding ) )
1610		{
1611			TiXmlAttribute attrib;
1612			p = attrib.Parse( p, data, _encoding );
1613			encoding = attrib.Value();
1614		}
1615		else if ( StringEqual( p, "standalone", true, _encoding ) )
1616		{
1617			TiXmlAttribute attrib;
1618			p = attrib.Parse( p, data, _encoding );
1619			standalone = attrib.Value();
1620		}
1621		else
1622		{
1623			// Read over whatever it is.
1624			while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1625				++p;
1626		}
1627	}
1628	return 0;
1629}
1630
1631bool TiXmlText::Blank() const
1632{
1633	for ( unsigned i=0; i<value.length(); i++ )
1634		if ( !IsWhiteSpace( value[i] ) )
1635			return false;
1636	return true;
1637}
1638
1639