1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "../../include/fxcrt/fx_xml.h"
8#include "xml_int.h"
9CXML_Parser::~CXML_Parser()
10{
11    if (m_bOwnedStream) {
12        m_pDataAcc->Release();
13    }
14}
15FX_BOOL CXML_Parser::Init(FX_LPBYTE pBuffer, size_t size)
16{
17    if (m_pAllocator) {
18        m_pDataAcc = FX_NewAtAllocator(m_pAllocator)CXML_DataBufAcc(pBuffer, size, m_pAllocator);
19    } else {
20        m_pDataAcc = FX_NEW CXML_DataBufAcc(pBuffer, size, NULL);
21    }
22    if (!m_pDataAcc) {
23        return FALSE;
24    }
25    return Init(TRUE);
26}
27FX_BOOL CXML_Parser::Init(IFX_FileRead *pFileRead)
28{
29    if (m_pAllocator) {
30        m_pDataAcc = FX_NewAtAllocator(m_pAllocator)CXML_DataStmAcc(pFileRead, m_pAllocator);
31    } else {
32        m_pDataAcc = FX_NEW CXML_DataStmAcc(pFileRead, NULL);
33    }
34    if (!m_pDataAcc) {
35        return FALSE;
36    }
37    return Init(TRUE);
38}
39FX_BOOL CXML_Parser::Init(IFX_BufferRead *pBuffer)
40{
41    if (!pBuffer) {
42        return FALSE;
43    }
44    m_pDataAcc = pBuffer;
45    return Init(FALSE);
46}
47FX_BOOL CXML_Parser::Init(FX_BOOL bOwndedStream)
48{
49    m_bOwnedStream = bOwndedStream;
50    m_nOffset = 0;
51    return ReadNextBlock();
52}
53FX_BOOL CXML_Parser::ReadNextBlock()
54{
55    if (!m_pDataAcc->ReadNextBlock()) {
56        return FALSE;
57    }
58    m_pBuffer = m_pDataAcc->GetBlockBuffer();
59    m_dwBufferSize = m_pDataAcc->GetBlockSize();
60    m_nBufferOffset = m_pDataAcc->GetBlockOffset();
61    m_dwIndex = 0;
62    return m_dwBufferSize > 0;
63}
64FX_BOOL CXML_Parser::IsEOF()
65{
66    if (!m_pDataAcc->IsEOF()) {
67        return FALSE;
68    }
69    return m_dwIndex >= m_dwBufferSize;
70}
71#define FXCRTM_XML_CHARTYPE_Normal			0x00
72#define FXCRTM_XML_CHARTYPE_SpaceChar		0x01
73#define FXCRTM_XML_CHARTYPE_Letter			0x02
74#define FXCRTM_XML_CHARTYPE_Digital			0x04
75#define FXCRTM_XML_CHARTYPE_NameIntro		0x08
76#define FXCRTM_XML_CHARTYPE_NameChar		0x10
77#define FXCRTM_XML_CHARTYPE_HexDigital		0x20
78#define FXCRTM_XML_CHARTYPE_HexLowerLetter	0x40
79#define FXCRTM_XML_CHARTYPE_HexUpperLetter	0x60
80#define FXCRTM_XML_CHARTYPE_HexChar			0x60
81FX_BYTE g_FXCRT_XML_ByteTypes[256] = {
82    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
83    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
84    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00,
85    0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
86    0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
87    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18,
88    0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
89    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00,
90    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
91    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
92    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
93    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
94    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
95    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
96    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
97    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x01, 0x01,
98};
99FX_BOOL g_FXCRT_XML_IsWhiteSpace(FX_BYTE ch)
100{
101    return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar) != 0;
102}
103FX_BOOL g_FXCRT_XML_IsLetter(FX_BYTE ch)
104{
105    return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Letter) != 0;
106}
107FX_BOOL g_FXCRT_XML_IsDigital(FX_BYTE ch)
108{
109    return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital) != 0;
110}
111FX_BOOL g_FXCRT_XML_IsNameIntro(FX_BYTE ch)
112{
113    return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro) != 0;
114}
115FX_BOOL g_FXCRT_XML_IsNameChar(FX_BYTE ch)
116{
117    return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar) != 0;
118}
119FX_BOOL g_FXCRT_XML_IsHexChar(FX_BYTE ch)
120{
121    return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar) != 0;
122}
123void CXML_Parser::SkipWhiteSpaces()
124{
125    m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
126    if (IsEOF()) {
127        return;
128    }
129    do {
130        while (m_dwIndex < m_dwBufferSize && g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) {
131            m_dwIndex ++;
132        }
133        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
134        if (m_dwIndex < m_dwBufferSize || IsEOF()) {
135            break;
136        }
137    } while (ReadNextBlock());
138}
139void CXML_Parser::GetName(CFX_ByteStringL &space, CFX_ByteStringL &name)
140{
141    m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
142    if (IsEOF()) {
143        return;
144    }
145    CFX_ByteTextBuf buf(m_pAllocator);
146    FX_BYTE ch;
147    do {
148        while (m_dwIndex < m_dwBufferSize) {
149            ch = m_pBuffer[m_dwIndex];
150            if (ch == ':') {
151                buf.GetByteStringL(space);
152                buf.Clear();
153            } else if (g_FXCRT_XML_IsNameChar(ch)) {
154                buf.AppendChar(ch);
155            } else {
156                break;
157            }
158            m_dwIndex ++;
159        }
160        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
161        if (m_dwIndex < m_dwBufferSize || IsEOF()) {
162            break;
163        }
164    } while (ReadNextBlock());
165    buf.GetByteStringL(name);
166}
167void CXML_Parser::SkipLiterals(FX_BSTR str)
168{
169    m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
170    if (IsEOF()) {
171        return;
172    }
173    FX_INT32 i = 0, iLen = str.GetLength();
174    do {
175        while (m_dwIndex < m_dwBufferSize) {
176            if (str.GetAt(i) != m_pBuffer[m_dwIndex ++]) {
177                i = 0;
178            } else {
179                i ++;
180                if (i == iLen) {
181                    break;
182                }
183            }
184        }
185        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
186        if (i == iLen) {
187            return;
188        }
189        if (m_dwIndex < m_dwBufferSize || IsEOF()) {
190            break;
191        }
192    } while (ReadNextBlock());
193    while (!m_pDataAcc->IsEOF()) {
194        ReadNextBlock();
195        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwBufferSize;
196    }
197    m_dwIndex = m_dwBufferSize;
198}
199FX_DWORD CXML_Parser::GetCharRef()
200{
201    m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
202    if (IsEOF()) {
203        return 0;
204    }
205    FX_BYTE ch;
206    FX_INT32 iState = 0;
207    CFX_ByteTextBuf buf(m_pAllocator);
208    FX_DWORD code = 0;
209    do {
210        while (m_dwIndex < m_dwBufferSize) {
211            ch = m_pBuffer[m_dwIndex];
212            switch (iState) {
213                case 0:
214                    if (ch == '#') {
215                        m_dwIndex ++;
216                        iState = 2;
217                        break;
218                    }
219                    iState = 1;
220                case 1:
221                    m_dwIndex ++;
222                    if (ch == ';') {
223                        CFX_ByteStringC ref = buf.GetByteString();
224                        if (ref == FX_BSTRC("gt")) {
225                            code = '>';
226                        } else if (ref == FX_BSTRC("lt")) {
227                            code = '<';
228                        } else if (ref == FX_BSTRC("amp")) {
229                            code = '&';
230                        } else if (ref == FX_BSTRC("apos")) {
231                            code = '\'';
232                        } else if (ref == FX_BSTRC("quot")) {
233                            code = '"';
234                        }
235                        iState = 10;
236                        break;
237                    }
238                    buf.AppendByte(ch);
239                    break;
240                case 2:
241                    if (ch == 'x') {
242                        m_dwIndex ++;
243                        iState = 4;
244                        break;
245                    }
246                    iState = 3;
247                case 3:
248                    m_dwIndex ++;
249                    if (ch == ';') {
250                        iState = 10;
251                        break;
252                    }
253                    if (g_FXCRT_XML_IsDigital(ch)) {
254                        code = code * 10 + ch - '0';
255                    }
256                    break;
257                case 4:
258                    m_dwIndex ++;
259                    if (ch == ';') {
260                        iState = 10;
261                        break;
262                    }
263                    FX_BYTE nHex = g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar;
264                    if (nHex) {
265                        if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) {
266                            code = (code << 4) + ch - '0';
267                        } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) {
268                            code = (code << 4) + ch - 87;
269                        } else {
270                            code = (code << 4) + ch - 55;
271                        }
272                    }
273                    break;
274            }
275            if (iState == 10) {
276                break;
277            }
278        }
279        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
280        if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
281            break;
282        }
283    } while (ReadNextBlock());
284    return code;
285}
286void CXML_Parser::GetAttrValue(CFX_WideStringL &value)
287{
288    m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
289    if (IsEOF()) {
290        return;
291    }
292    CFX_UTF8Decoder decoder(m_pAllocator);
293    FX_BYTE mark = 0, ch;
294    do {
295        while (m_dwIndex < m_dwBufferSize) {
296            ch = m_pBuffer[m_dwIndex];
297            if (mark == 0) {
298                if (ch != '\'' && ch != '"') {
299                    return;
300                }
301                mark = ch;
302                m_dwIndex ++;
303                ch = 0;
304                continue;
305            }
306            m_dwIndex ++;
307            if (ch == mark) {
308                break;
309            }
310            if (ch == '&') {
311                decoder.AppendChar(GetCharRef());
312                if (IsEOF()) {
313                    decoder.GetResult(value);
314                    return;
315                }
316            } else {
317                decoder.Input(ch);
318            }
319        }
320        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
321        if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) {
322            break;
323        }
324    } while (ReadNextBlock());
325    decoder.GetResult(value);
326}
327void CXML_Parser::GetTagName(CFX_ByteStringL &space, CFX_ByteStringL &name, FX_BOOL &bEndTag, FX_BOOL bStartTag)
328{
329    m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
330    if (IsEOF()) {
331        return;
332    }
333    bEndTag = FALSE;
334    FX_BYTE ch;
335    FX_INT32 iState = bStartTag ? 1 : 0;
336    do {
337        while (m_dwIndex < m_dwBufferSize) {
338            ch = m_pBuffer[m_dwIndex];
339            switch (iState) {
340                case 0:
341                    m_dwIndex ++;
342                    if (ch != '<') {
343                        break;
344                    }
345                    iState = 1;
346                    break;
347                case 1:
348                    if (ch == '?') {
349                        m_dwIndex ++;
350                        SkipLiterals(FX_BSTRC("?>"));
351                        iState = 0;
352                        break;
353                    } else if (ch == '!') {
354                        m_dwIndex ++;
355                        SkipLiterals(FX_BSTRC("-->"));
356                        iState = 0;
357                        break;
358                    }
359                    if (ch == '/') {
360                        m_dwIndex ++;
361                        GetName(space, name);
362                        bEndTag = TRUE;
363                    } else {
364                        GetName(space, name);
365                        bEndTag = FALSE;
366                    }
367                    return;
368            }
369        }
370        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
371        if (m_dwIndex < m_dwBufferSize || IsEOF()) {
372            break;
373        }
374    } while (ReadNextBlock());
375}
376CXML_Element* CXML_Parser::ParseElement(CXML_Element* pParent, FX_BOOL bStartTag)
377{
378    m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
379    if (IsEOF()) {
380        return NULL;
381    }
382    CFX_ByteStringL tag_name, tag_space;
383    FX_BOOL bEndTag;
384    GetTagName(tag_space, tag_name, bEndTag, bStartTag);
385    if (tag_name.IsEmpty() || bEndTag) {
386        tag_space.Empty(m_pAllocator);
387        return NULL;
388    }
389    CXML_Element* pElement;
390    if (m_pAllocator) {
391        pElement = FX_NewAtAllocator(m_pAllocator)CXML_Element(m_pAllocator);
392    } else {
393        pElement = FX_NEW CXML_Element;
394    }
395    if (pElement) {
396        pElement->m_pParent = pParent;
397        pElement->SetTag(tag_space, tag_name);
398    }
399    tag_space.Empty(m_pAllocator);
400    tag_name.Empty(m_pAllocator);
401    if (!pElement) {
402        return NULL;
403    }
404    do {
405        CFX_ByteStringL attr_space, attr_name;
406        while (m_dwIndex < m_dwBufferSize) {
407            SkipWhiteSpaces();
408            if (IsEOF()) {
409                break;
410            }
411            if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) {
412                break;
413            }
414            attr_space.Empty(m_pAllocator);
415            attr_name.Empty(m_pAllocator);
416            GetName(attr_space, attr_name);
417            SkipWhiteSpaces();
418            if (IsEOF()) {
419                break;
420            }
421            if (m_pBuffer[m_dwIndex] != '=') {
422                break;
423            }
424            m_dwIndex ++;
425            SkipWhiteSpaces();
426            if (IsEOF()) {
427                break;
428            }
429            CFX_WideStringL attr_value;
430            GetAttrValue(attr_value);
431            pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value, m_pAllocator);
432            attr_value.Empty(m_pAllocator);
433        }
434        attr_space.Empty(m_pAllocator);
435        attr_name.Empty(m_pAllocator);
436        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
437        if (m_dwIndex < m_dwBufferSize || IsEOF()) {
438            break;
439        }
440    } while (ReadNextBlock());
441    SkipWhiteSpaces();
442    if (IsEOF()) {
443        return pElement;
444    }
445    FX_BYTE ch = m_pBuffer[m_dwIndex ++];
446    if (ch == '/') {
447        m_dwIndex ++;
448        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
449        return pElement;
450    }
451    if (ch != '>') {
452        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
453        if (m_pAllocator) {
454            FX_DeleteAtAllocator(pElement, m_pAllocator, CXML_Element);
455        } else {
456            delete pElement;
457        }
458        return NULL;
459    }
460    SkipWhiteSpaces();
461    if (IsEOF()) {
462        return pElement;
463    }
464    CFX_UTF8Decoder decoder(m_pAllocator);
465    CFX_WideTextBuf content(m_pAllocator);
466    FX_BOOL bCDATA = FALSE;
467    FX_INT32 iState = 0;
468    do {
469        while (m_dwIndex < m_dwBufferSize) {
470            ch = m_pBuffer[m_dwIndex ++];
471            switch (iState) {
472                case 0:
473                    if (ch == '<') {
474                        iState = 1;
475                    } else if (ch == '&') {
476                        decoder.ClearStatus();
477                        decoder.AppendChar(GetCharRef());
478                    } else {
479                        decoder.Input(ch);
480                    }
481                    break;
482                case 1:
483                    if (ch == '!') {
484                        iState = 2;
485                    } else if (ch == '?') {
486                        SkipLiterals(FX_BSTRC("?>"));
487                        SkipWhiteSpaces();
488                        iState = 0;
489                    } else if (ch == '/') {
490                        CFX_ByteStringL space, name;
491                        GetName(space, name);
492                        space.Empty(m_pAllocator);
493                        name.Empty(m_pAllocator);
494                        SkipWhiteSpaces();
495                        m_dwIndex ++;
496                        iState = 10;
497                    } else {
498                        content << decoder.GetResult();
499                        CFX_WideStringL dataStr;
500                        content.GetWideStringL(dataStr);
501                        if (!bCDATA && !m_bSaveSpaceChars) {
502                            dataStr.TrimRight((FX_LPCWSTR)L" \t\r\n");
503                        }
504                        InsertContentSegment(bCDATA, dataStr, pElement);
505                        dataStr.Empty(m_pAllocator);
506                        content.Clear();
507                        decoder.Clear();
508                        bCDATA = FALSE;
509                        iState = 0;
510                        m_dwIndex --;
511                        CXML_Element* pSubElement = ParseElement(pElement, TRUE);
512                        if (pSubElement == NULL) {
513                            break;
514                        }
515                        pSubElement->m_pParent = pElement;
516                        pElement->m_Children.Add((FX_LPVOID)CXML_Element::Element);
517                        pElement->m_Children.Add(pSubElement);
518                        SkipWhiteSpaces();
519                    }
520                    break;
521                case 2:
522                    if (ch == '[') {
523                        SkipLiterals(FX_BSTRC("]]>"));
524                    } else if (ch == '-') {
525                        m_dwIndex ++;
526                        SkipLiterals(FX_BSTRC("-->"));
527                    } else {
528                        SkipLiterals(FX_BSTRC(">"));
529                    }
530                    decoder.Clear();
531                    SkipWhiteSpaces();
532                    iState = 0;
533                    break;
534            }
535            if (iState == 10) {
536                break;
537            }
538        }
539        m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
540        if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
541            break;
542        }
543    } while (ReadNextBlock());
544    content << decoder.GetResult();
545    CFX_WideStringL dataStr;
546    content.GetWideStringL(dataStr);
547    if (!m_bSaveSpaceChars) {
548        dataStr.TrimRight((FX_LPCWSTR)L" \t\r\n");
549    }
550    InsertContentSegment(bCDATA, dataStr, pElement);
551    dataStr.Empty(m_pAllocator);
552    content.Clear();
553    decoder.Clear();
554    bCDATA = FALSE;
555    return pElement;
556}
557void CXML_Parser::InsertContentSegment(FX_BOOL bCDATA, FX_WSTR content, CXML_Element* pElement)
558{
559    if (content.IsEmpty()) {
560        return;
561    }
562    CXML_Content* pContent;
563    if (m_pAllocator) {
564        pContent = FX_NewAtAllocator(m_pAllocator)CXML_Content;
565    } else {
566        pContent = FX_NEW CXML_Content;
567    }
568    if (!pContent) {
569        return;
570    }
571    pContent->Set(bCDATA, content, m_pAllocator);
572    pElement->m_Children.Add((FX_LPVOID)CXML_Element::Content);
573    pElement->m_Children.Add(pContent);
574}
575static CXML_Element* XML_ContinueParse(CXML_Parser &parser, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize)
576{
577    parser.m_bSaveSpaceChars = bSaveSpaceChars;
578    CXML_Element* pElement = parser.ParseElement(NULL, FALSE);
579    if (pParsedSize) {
580        *pParsedSize = parser.m_nOffset;
581    }
582    return pElement;
583}
584CXML_Element* CXML_Element::Parse(const void* pBuffer, size_t size, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator)
585{
586    CXML_Parser parser(pAllocator);
587    if (!parser.Init((FX_LPBYTE)pBuffer, size)) {
588        return NULL;
589    }
590    return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
591}
592CXML_Element* CXML_Element::Parse(IFX_FileRead *pFile, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator)
593{
594    CXML_Parser parser(pAllocator);
595    if (!parser.Init(pFile)) {
596        return NULL;
597    }
598    return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
599}
600CXML_Element* CXML_Element::Parse(IFX_BufferRead *pBuffer, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator)
601{
602    CXML_Parser parser(pAllocator);
603    if (!parser.Init(pBuffer)) {
604        return NULL;
605    }
606    return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
607}
608CXML_Element::CXML_Element(IFX_Allocator* pAllocator)
609    : m_pParent(NULL)
610    , m_QSpaceName()
611    , m_TagName()
612    , m_AttrMap()
613    , m_Children(pAllocator)
614{
615}
616CXML_Element::CXML_Element(FX_BSTR qSpace, FX_BSTR tagName, IFX_Allocator* pAllocator)
617    : m_pParent(NULL)
618    , m_QSpaceName()
619    , m_TagName()
620    , m_AttrMap()
621    , m_Children(pAllocator)
622{
623    m_QSpaceName.Set(qSpace, pAllocator);
624    m_TagName.Set(tagName, pAllocator);
625}
626CXML_Element::CXML_Element(FX_BSTR qTagName, IFX_Allocator* pAllocator)
627    : m_pParent(NULL)
628    , m_QSpaceName()
629    , m_TagName()
630    , m_AttrMap()
631    , m_Children(pAllocator)
632{
633    SetTag(qTagName);
634}
635CXML_Element::~CXML_Element()
636{
637    Empty();
638}
639void CXML_Element::Empty()
640{
641    IFX_Allocator* pAllocator = m_Children.m_pAllocator;
642    m_QSpaceName.Empty(pAllocator);
643    m_TagName.Empty(pAllocator);
644    m_AttrMap.RemoveAll(pAllocator);
645    RemoveChildren();
646}
647void CXML_Element::RemoveChildren()
648{
649    IFX_Allocator* pAllocator = m_Children.m_pAllocator;
650    for (int i = 0; i < m_Children.GetSize(); i += 2) {
651        ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i);
652        if (type == Content) {
653            CXML_Content* content = (CXML_Content*)m_Children.GetAt(i + 1);
654            if (pAllocator) {
655                FX_DeleteAtAllocator(content, pAllocator, CXML_Content);
656            } else {
657                delete content;
658            }
659        } else if (type == Element) {
660            CXML_Element* child = (CXML_Element*)m_Children.GetAt(i + 1);
661            child->RemoveChildren();
662            if (pAllocator) {
663                FX_DeleteAtAllocator(child, pAllocator, CXML_Element);
664            } else {
665                delete child;
666            }
667        }
668    }
669    m_Children.RemoveAll();
670}
671CFX_ByteString CXML_Element::GetTagName(FX_BOOL bQualified) const
672{
673    if (!bQualified || m_QSpaceName.IsEmpty()) {
674        return m_TagName;
675    }
676    CFX_ByteString bsTag = m_QSpaceName;
677    bsTag += ":";
678    bsTag += m_TagName;
679    return bsTag;
680}
681void CXML_Element::GetTagName(CFX_ByteStringL &tagName, FX_BOOL bQualified) const
682{
683    IFX_Allocator* pAllocator = m_Children.m_pAllocator;
684    if (!bQualified || m_QSpaceName.IsEmpty()) {
685        tagName.Set(m_TagName, pAllocator);
686        return;
687    }
688    FX_LPSTR str = tagName.AllocBuffer(m_QSpaceName.GetLength() + m_TagName.GetLength() + 2, pAllocator);
689    if (!str) {
690        return;
691    }
692    FXSYS_memcpy32(str, m_QSpaceName.GetCStr(), m_QSpaceName.GetLength());
693    str += m_QSpaceName.GetLength();
694    *str = ':';
695    str ++;
696    FXSYS_memcpy32(str, m_TagName.GetCStr(), m_TagName.GetLength());
697    str += m_TagName.GetLength();
698    *str = '\0';
699}
700CFX_ByteString CXML_Element::GetNamespace(FX_BOOL bQualified) const
701{
702    if (bQualified) {
703        return m_QSpaceName;
704    }
705    return GetNamespaceURI(m_QSpaceName);
706}
707void CXML_Element::GetNamespace(CFX_ByteStringL &nameSpace, FX_BOOL bQualified) const
708{
709    IFX_Allocator* pAllocator = m_Children.m_pAllocator;
710    if (bQualified) {
711        nameSpace.Set(m_QSpaceName, pAllocator);
712        return;
713    }
714    GetNamespaceURI(m_QSpaceName, nameSpace);
715}
716CFX_ByteString CXML_Element::GetNamespaceURI(FX_BSTR qName) const
717{
718    const CFX_WideStringL* pwsSpace;
719    const CXML_Element *pElement = this;
720    do {
721        if (qName.IsEmpty()) {
722            pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC(""), FX_BSTRC("xmlns"));
723        } else {
724            pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC("xmlns"), qName);
725        }
726        if (pwsSpace) {
727            break;
728        }
729        pElement = pElement->GetParent();
730    } while(pElement);
731    return pwsSpace ? FX_UTF8Encode(*pwsSpace) : CFX_ByteString();
732}
733void CXML_Element::GetNamespaceURI(FX_BSTR qName, CFX_ByteStringL &uri) const
734{
735    IFX_Allocator* pAllocator = m_Children.m_pAllocator;
736    const CFX_WideStringL* pwsSpace;
737    const CXML_Element *pElement = this;
738    do {
739        if (qName.IsEmpty()) {
740            pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC(""), FX_BSTRC("xmlns"));
741        } else {
742            pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC("xmlns"), qName);
743        }
744        if (pwsSpace) {
745            break;
746        }
747        pElement = pElement->GetParent();
748    } while(pElement);
749    if (pwsSpace) {
750        FX_UTF8Encode(pwsSpace->GetPtr(), pwsSpace->GetLength(), uri, pAllocator);
751    }
752}
753void CXML_Element::GetAttrByIndex(int index, CFX_ByteString& space, CFX_ByteString& name, CFX_WideString& value) const
754{
755    if (index < 0 || index >= m_AttrMap.GetSize()) {
756        return;
757    }
758    CXML_AttrItem& item = m_AttrMap.GetAt(index);
759    space = item.m_QSpaceName;
760    name = item.m_AttrName;
761    value = item.m_Value;
762}
763void CXML_Element::GetAttrByIndex(int index, CFX_ByteStringL &space, CFX_ByteStringL &name, CFX_WideStringL &value) const
764{
765    if (index < 0 || index >= m_AttrMap.GetSize()) {
766        return;
767    }
768    IFX_Allocator* pAllocator = m_Children.m_pAllocator;
769    CXML_AttrItem& item = m_AttrMap.GetAt(index);
770    space.Set(item.m_QSpaceName, pAllocator);
771    name.Set(item.m_AttrName, pAllocator);
772    value.Set(item.m_Value, pAllocator);
773}
774FX_BOOL CXML_Element::HasAttr(FX_BSTR name) const
775{
776    CFX_ByteStringC bsSpace, bsName;
777    FX_XML_SplitQualifiedName(name, bsSpace, bsName);
778    return m_AttrMap.Lookup(bsSpace, bsName) != NULL;
779}
780FX_BOOL CXML_Element::GetAttrValue(FX_BSTR name, CFX_WideString& attribute) const
781{
782    CFX_ByteStringC bsSpace, bsName;
783    FX_XML_SplitQualifiedName(name, bsSpace, bsName);
784    const CFX_WideStringL* pValue = m_AttrMap.Lookup(bsSpace, bsName);
785    if (pValue) {
786        attribute = CFX_WideString(pValue->GetPtr(), pValue->GetLength());
787        return TRUE;
788    }
789    return FALSE;
790}
791const CFX_WideStringL* CXML_Element::GetAttrValuePtr(FX_BSTR name) const
792{
793    CFX_ByteStringC bsSpace, bsName;
794    FX_XML_SplitQualifiedName(name, bsSpace, bsName);
795    return m_AttrMap.Lookup(bsSpace, bsName);
796}
797FX_BOOL CXML_Element::GetAttrValue(FX_BSTR space, FX_BSTR name, CFX_WideString& attribute) const
798{
799    const CFX_WideStringL* pValue = m_AttrMap.Lookup(space, name);
800    if (pValue) {
801        attribute = CFX_WideString(pValue->GetPtr(), pValue->GetLength());
802        return TRUE;
803    }
804    return FALSE;
805}
806const CFX_WideStringL* CXML_Element::GetAttrValuePtr(FX_BSTR space, FX_BSTR name) const
807{
808    return m_AttrMap.Lookup(space, name);
809}
810FX_BOOL CXML_Element::GetAttrInteger(FX_BSTR name, int& attribute) const
811{
812    CFX_ByteStringC bsSpace, bsName;
813    FX_XML_SplitQualifiedName(name, bsSpace, bsName);
814    const CFX_WideStringL* pwsValue = m_AttrMap.Lookup(bsSpace, bsName);
815    if (pwsValue) {
816        attribute = pwsValue->GetInteger();
817        return TRUE;
818    }
819    return FALSE;
820}
821FX_BOOL	CXML_Element::GetAttrInteger(FX_BSTR space, FX_BSTR name, int& attribute) const
822{
823    const CFX_WideStringL* pwsValue = m_AttrMap.Lookup(space, name);
824    if (pwsValue) {
825        attribute = pwsValue->GetInteger();
826        return TRUE;
827    }
828    return FALSE;
829}
830FX_BOOL CXML_Element::GetAttrFloat(FX_BSTR name, FX_FLOAT& attribute) const
831{
832    CFX_ByteStringC bsSpace, bsName;
833    FX_XML_SplitQualifiedName(name, bsSpace, bsName);
834    return GetAttrFloat(bsSpace, bsName, attribute);
835}
836FX_BOOL CXML_Element::GetAttrFloat(FX_BSTR space, FX_BSTR name, FX_FLOAT& attribute) const
837{
838    CFX_WideString value;
839    const CFX_WideStringL* pValue = m_AttrMap.Lookup(space, name);
840    if (pValue) {
841        attribute = pValue->GetFloat();
842        return TRUE;
843    }
844    return FALSE;
845}
846FX_DWORD CXML_Element::CountChildren() const
847{
848    return m_Children.GetSize() / 2;
849}
850CXML_Element::ChildType CXML_Element::GetChildType(FX_DWORD index) const
851{
852    index <<= 1;
853    if (index >= (FX_DWORD)m_Children.GetSize()) {
854        return Invalid;
855    }
856    return (ChildType)(FX_UINTPTR)m_Children.GetAt(index);
857}
858CFX_WideString CXML_Element::GetContent(FX_DWORD index) const
859{
860    index <<= 1;
861    if (index >= (FX_DWORD)m_Children.GetSize() ||
862            (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Content) {
863        return CFX_WideString();
864    }
865    CXML_Content* pContent = (CXML_Content*)m_Children.GetAt(index + 1);
866    if (pContent) {
867        return pContent->m_Content;
868    }
869    return CFX_WideString();
870}
871const CFX_WideStringL* CXML_Element::GetContentPtr(FX_DWORD index) const
872{
873    index <<= 1;
874    if (index >= (FX_DWORD)m_Children.GetSize() ||
875            (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Content) {
876        return NULL;
877    }
878    CXML_Content* pContent = (CXML_Content*)m_Children.GetAt(index + 1);
879    if (pContent) {
880        return &pContent->m_Content;
881    }
882    return NULL;
883}
884CXML_Element* CXML_Element::GetElement(FX_DWORD index) const
885{
886    index <<= 1;
887    if (index >= (FX_DWORD)m_Children.GetSize() ||
888            (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Element) {
889        return NULL;
890    }
891    return (CXML_Element*)m_Children.GetAt(index + 1);
892}
893FX_DWORD CXML_Element::CountElements(FX_BSTR space, FX_BSTR tag) const
894{
895    int count = 0;
896    for (int i = 0; i < m_Children.GetSize(); i += 2) {
897        ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i);
898        if (type != Element) {
899            continue;
900        }
901        CXML_Element* pKid = (CXML_Element*)m_Children.GetAt(i + 1);
902        if ((space.IsEmpty() || pKid->m_QSpaceName == space) && pKid->m_TagName == tag) {
903            count ++;
904        }
905    }
906    return count;
907}
908CXML_Element* CXML_Element::GetElement(FX_BSTR space, FX_BSTR tag, int index) const
909{
910    if (index < 0) {
911        return NULL;
912    }
913    for (int i = 0; i < m_Children.GetSize(); i += 2) {
914        ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i);
915        if (type != Element) {
916            continue;
917        }
918        CXML_Element* pKid = (CXML_Element*)m_Children.GetAt(i + 1);
919        if ((!space.IsEmpty() && pKid->m_QSpaceName != space) || pKid->m_TagName != tag) {
920            continue;
921        }
922        if (index -- == 0) {
923            return pKid;
924        }
925    }
926    return NULL;
927}
928FX_DWORD CXML_Element::FindElement(CXML_Element *pChild) const
929{
930    for (int i = 0; i < m_Children.GetSize(); i += 2) {
931        if ((ChildType)(FX_UINTPTR)m_Children.GetAt(i) == Element &&
932                (CXML_Element*)m_Children.GetAt(i + 1) == pChild) {
933            return (FX_DWORD)(i >> 1);
934        }
935    }
936    return (FX_DWORD) - 1;
937}
938const CFX_WideStringL* CXML_AttrMap::Lookup(FX_BSTR space, FX_BSTR name) const
939{
940    if (m_pMap == NULL) {
941        return NULL;
942    }
943    for (int i = 0; i < m_pMap->GetSize(); i ++) {
944        CXML_AttrItem& item = GetAt(i);
945        if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) {
946            return &item.m_Value;
947        }
948    }
949    return NULL;
950}
951void CXML_AttrMap::SetAt(FX_BSTR space, FX_BSTR name, FX_WSTR value, IFX_Allocator* pAllocator)
952{
953    for (int i = 0; i < GetSize(); i ++) {
954        CXML_AttrItem& item = GetAt(i);
955        if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) {
956            item.m_Value.Set(value, pAllocator);
957            return;
958        }
959    }
960    if (!m_pMap) {
961        if (pAllocator) {
962            m_pMap = FX_NewAtAllocator(pAllocator)CFX_ObjectArray<CXML_AttrItem>(pAllocator);
963        } else {
964            m_pMap = FX_NEW CFX_ObjectArray<CXML_AttrItem>;
965        }
966    }
967    if (!m_pMap) {
968        return;
969    }
970    CXML_AttrItem* pItem = (CXML_AttrItem*)m_pMap->AddSpace();
971    if (!pItem) {
972        return;
973    }
974    pItem->m_QSpaceName.Set(space, pAllocator);
975    pItem->m_AttrName.Set(name, pAllocator);
976    pItem->m_Value.Set(value, pAllocator);
977}
978void CXML_AttrMap::RemoveAt(FX_BSTR space, FX_BSTR name, IFX_Allocator* pAllocator)
979{
980    if (m_pMap == NULL) {
981        return;
982    }
983    for (int i = 0; i < m_pMap->GetSize(); i ++) {
984        CXML_AttrItem& item = GetAt(i);
985        if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) {
986            item.Empty(pAllocator);
987            m_pMap->RemoveAt(i);
988            return;
989        }
990    }
991}
992int CXML_AttrMap::GetSize() const
993{
994    return m_pMap == NULL ? 0 : m_pMap->GetSize();
995}
996CXML_AttrItem& CXML_AttrMap::GetAt(int index) const
997{
998    ASSERT(m_pMap != NULL);
999    return (*m_pMap)[index];
1000}
1001void CXML_AttrMap::RemoveAll(IFX_Allocator* pAllocator)
1002{
1003    if (!m_pMap) {
1004        return;
1005    }
1006    for (int i = 0; i < m_pMap->GetSize(); i ++) {
1007        CXML_AttrItem& item = (*m_pMap)[i];
1008        item.Empty(pAllocator);
1009    }
1010    m_pMap->RemoveAll();
1011    if (pAllocator) {
1012        FX_DeleteAtAllocator(m_pMap, pAllocator, CFX_ObjectArray<CXML_AttrItem>);
1013    } else {
1014        delete m_pMap;
1015    }
1016    m_pMap = NULL;
1017}
1018