1ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved.
2ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Use of this source code is governed by a BSD-style license that can be
3ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// found in the LICENSE file.
4ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
5ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
7ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#ifndef _FXCRT_XML_INT_
8ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define _FXCRT_XML_INT_
9ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CXML_DataBufAcc : public IFX_BufferRead, public CFX_Object
10ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
11ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
12ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CXML_DataBufAcc(FX_LPCBYTE pBuffer, size_t size, IFX_Allocator* pAllocator = NULL)
13ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        : m_pAllocator(pAllocator)
14ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        , m_pBuffer(pBuffer)
15ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        , m_dwSize(size)
16ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        , m_dwCurPos(0)
17ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
18ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
19ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual ~CXML_DataBufAcc() {}
20ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual void			Release()
21ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
22ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (m_pAllocator) {
23ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            FX_DeleteAtAllocator(this, m_pAllocator, CXML_DataBufAcc);
24ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        } else {
25ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            delete this;
26ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
27ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
28ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_BOOL			IsEOF()
29ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
30ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_dwCurPos >= m_dwSize;
31ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
32ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_FILESIZE		GetPosition()
33ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
34ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return (FX_FILESIZE)m_dwCurPos;
35ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
36ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual size_t			ReadBlock(void* buffer, size_t size)
37ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
38ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return 0;
39ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
40ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_BOOL			ReadNextBlock(FX_BOOL bRestart = FALSE)
41ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
42ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (bRestart) {
43ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            m_dwCurPos = 0;
44ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
45ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (m_dwCurPos < m_dwSize) {
46ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            m_dwCurPos = m_dwSize;
47ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return TRUE;
48ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
49ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return FALSE;
50ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
51ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_LPCBYTE		GetBlockBuffer()
52ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
53ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_pBuffer;
54ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
55ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual size_t			GetBlockSize()
56ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
57ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_dwSize;
58ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
59ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_FILESIZE		GetBlockOffset()
60ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
61ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return 0;
62ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
63ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovprotected:
64ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    IFX_Allocator*	m_pAllocator;
65ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_LPCBYTE		m_pBuffer;
66ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    size_t			m_dwSize;
67ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    size_t			m_dwCurPos;
68ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
69ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FX_XMLDATASTREAM_BufferSize		(32 * 1024)
70ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CXML_DataStmAcc : public IFX_BufferRead, public CFX_Object
71ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
72ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
73ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CXML_DataStmAcc(IFX_FileRead *pFileRead, IFX_Allocator* pAllocator = NULL)
74ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        : m_pAllocator(pAllocator)
75ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        , m_pFileRead(pFileRead)
76ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        , m_pBuffer(NULL)
77ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        , m_nStart(0)
78ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        , m_dwSize(0)
79ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
80ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        FXSYS_assert(m_pFileRead != NULL);
81ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
82ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual ~CXML_DataStmAcc()
83ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
84ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (m_pBuffer) {
85ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            FX_Allocator_Free(m_pAllocator, m_pBuffer);
86ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
87ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
88ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual void			Release()
89ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
90ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (m_pAllocator) {
91ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            FX_DeleteAtAllocator(this, m_pAllocator, CXML_DataStmAcc);
92ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        } else {
93ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            delete this;
94ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
95ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
96ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_BOOL			IsEOF()
97ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
98ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_nStart + (FX_FILESIZE)m_dwSize >= m_pFileRead->GetSize();
99ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
100ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_FILESIZE		GetPosition()
101ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
102ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_nStart + (FX_FILESIZE)m_dwSize;
103ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
104ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual size_t			ReadBlock(void* buffer, size_t size)
105ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
106ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return 0;
107ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
108ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_BOOL			ReadNextBlock(FX_BOOL bRestart = FALSE)
109ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
110ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (bRestart) {
111ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            m_nStart = 0;
112ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
113ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        FX_FILESIZE nLength = m_pFileRead->GetSize();
114ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        m_nStart += (FX_FILESIZE)m_dwSize;
115ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (m_nStart >= nLength) {
116ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return FALSE;
117ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
118ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        m_dwSize = (size_t)FX_MIN(FX_XMLDATASTREAM_BufferSize, nLength - m_nStart);
119ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (!m_pBuffer) {
120ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            m_pBuffer = FX_Allocator_Alloc(m_pAllocator, FX_BYTE, m_dwSize);
121ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            if (!m_pBuffer) {
122ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                return FALSE;
123ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            }
124ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
125ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_pFileRead->ReadBlock(m_pBuffer, m_nStart, m_dwSize);
126ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
127ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_LPCBYTE		GetBlockBuffer()
128ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
129ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return (FX_LPCBYTE)m_pBuffer;
130ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
131ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual size_t			GetBlockSize()
132ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
133ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_dwSize;
134ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
135ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_FILESIZE		GetBlockOffset()
136ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
137ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_nStart;
138ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
139ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovprotected:
140ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    IFX_Allocator*	m_pAllocator;
141ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    IFX_FileRead	*m_pFileRead;
142ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_LPBYTE		m_pBuffer;
143ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_FILESIZE		m_nStart;
144ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    size_t			m_dwSize;
145ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
146ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CXML_Parser
147ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
148ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
149ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CXML_Parser(IFX_Allocator* pAllocator = NULL) : m_pAllocator(pAllocator) {}
150ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    ~CXML_Parser();
151ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    IFX_Allocator*	m_pAllocator;
152ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    IFX_BufferRead*	m_pDataAcc;
153ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			m_bOwnedStream;
154ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_FILESIZE		m_nOffset;
155ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			m_bSaveSpaceChars;
156ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_LPCBYTE		m_pBuffer;
157ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    size_t			m_dwBufferSize;
158ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_FILESIZE		m_nBufferOffset;
159ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    size_t			m_dwIndex;
160ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			Init(FX_LPBYTE pBuffer, size_t size);
161ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			Init(IFX_FileRead *pFileRead);
162ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			Init(IFX_BufferRead *pBuffer);
163ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			Init(FX_BOOL bOwndedStream);
164ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			ReadNextBlock();
165ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			IsEOF();
166ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			HaveAvailData();
167ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void			SkipWhiteSpaces();
168ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void			GetName(CFX_ByteStringL &space, CFX_ByteStringL &name);
169ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void			GetAttrValue(CFX_WideStringL &value);
170ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_DWORD		GetCharRef();
171ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void			GetTagName(CFX_ByteStringL &space, CFX_ByteStringL &name, FX_BOOL &bEndTag, FX_BOOL bStartTag = FALSE);
172ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void			SkipLiterals(FX_BSTR str);
173ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CXML_Element*	ParseElement(CXML_Element* pParent, FX_BOOL bStartTag = FALSE);
174ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void			InsertContentSegment(FX_BOOL bCDATA, FX_WSTR content, CXML_Element* pElement);
175ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void			InsertCDATASegment(CFX_UTF8Decoder& decoder, CXML_Element* pElement);
176ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
177ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovvoid FX_XML_SplitQualifiedName(FX_BSTR bsFullName, CFX_ByteStringC &bsSpace, CFX_ByteStringC &bsName);
178ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#endif
179