fde_xml_imp.h revision 4d3acf4ec42bf6e838f9060103aff98fbf170794
1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef XFA_FDE_XML_FDE_XML_IMP_H_
8#define XFA_FDE_XML_FDE_XML_IMP_H_
9
10#include <memory>
11#include <vector>
12
13#include "core/fxcrt/fx_system.h"
14#include "xfa/fde/xml/fde_xml.h"
15#include "xfa/fgas/crt/fgas_stream.h"
16#include "xfa/fgas/crt/fgas_utils.h"
17
18class CFDE_BlockBuffer;
19class CFDE_XMLInstruction;
20class CFDE_XMLElement;
21class CFDE_XMLText;
22class CFDE_XMLDoc;
23class CFDE_XMLDOMParser;
24class CFDE_XMLSyntaxParser;
25class IFDE_XMLParser;
26
27class CFDE_XMLNode {
28 public:
29  enum NodeItem {
30    Root = 0,
31    Parent,
32    FirstSibling,
33    PriorSibling,
34    NextSibling,
35    LastSibling,
36    FirstNeighbor,
37    PriorNeighbor,
38    NextNeighbor,
39    LastNeighbor,
40    FirstChild,
41    LastChild
42  };
43
44  CFDE_XMLNode();
45  virtual ~CFDE_XMLNode();
46
47  virtual void Release();
48  virtual FDE_XMLNODETYPE GetType() const;
49  virtual CFDE_XMLNode* Clone(bool bRecursive);
50
51  int32_t CountChildNodes() const;
52  CFDE_XMLNode* GetChildNode(int32_t index) const;
53  int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const;
54  int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1);
55  void RemoveChildNode(CFDE_XMLNode* pNode);
56  void DeleteChildren();
57  void CloneChildren(CFDE_XMLNode* pClone);
58
59  CFDE_XMLNode* GetPath(const FX_WCHAR* pPath,
60                        int32_t iLength = -1,
61                        bool bQualifiedName = true) const;
62
63  int32_t GetNodeLevel() const;
64  CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const;
65  bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode);
66  CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem);
67
68  void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream);
69
70  CFDE_XMLNode* m_pParent;
71  CFDE_XMLNode* m_pChild;
72  CFDE_XMLNode* m_pPrior;
73  CFDE_XMLNode* m_pNext;
74};
75
76class CFDE_XMLInstruction : public CFDE_XMLNode {
77 public:
78  explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget);
79  ~CFDE_XMLInstruction() override;
80
81  // CFDE_XMLNode
82  void Release() override;
83  FDE_XMLNODETYPE GetType() const override;
84  CFDE_XMLNode* Clone(bool bRecursive) override;
85
86  void GetTargetName(CFX_WideString& wsTarget) const { wsTarget = m_wsTarget; }
87  int32_t CountAttributes() const;
88  bool GetAttribute(int32_t index,
89                    CFX_WideString& wsAttriName,
90                    CFX_WideString& wsAttriValue) const;
91  bool HasAttribute(const FX_WCHAR* pwsAttriName) const;
92  void GetString(const FX_WCHAR* pwsAttriName,
93                 CFX_WideString& wsAttriValue,
94                 const FX_WCHAR* pwsDefValue = nullptr) const;
95  void SetString(const CFX_WideString& wsAttriName,
96                 const CFX_WideString& wsAttriValue);
97  int32_t GetInteger(const FX_WCHAR* pwsAttriName, int32_t iDefValue = 0) const;
98  void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue);
99  FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fDefValue = 0) const;
100  void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue);
101  void RemoveAttribute(const FX_WCHAR* pwsAttriName);
102  int32_t CountData() const;
103  bool GetData(int32_t index, CFX_WideString& wsData) const;
104  void AppendData(const CFX_WideString& wsData);
105  void RemoveData(int32_t index);
106
107  CFX_WideString m_wsTarget;
108  std::vector<CFX_WideString> m_Attributes;
109  std::vector<CFX_WideString> m_TargetData;
110};
111
112class CFDE_XMLElement : public CFDE_XMLNode {
113 public:
114  explicit CFDE_XMLElement(const CFX_WideString& wsTag);
115  ~CFDE_XMLElement() override;
116
117  // CFDE_XMLNode
118  void Release() override;
119  FDE_XMLNODETYPE GetType() const override;
120  CFDE_XMLNode* Clone(bool bRecursive) override;
121
122  void GetTagName(CFX_WideString& wsTag) const;
123  void GetLocalTagName(CFX_WideString& wsTag) const;
124
125  void GetNamespacePrefix(CFX_WideString& wsPrefix) const;
126  void GetNamespaceURI(CFX_WideString& wsNamespace) const;
127
128  int32_t CountAttributes() const;
129  bool GetAttribute(int32_t index,
130                    CFX_WideString& wsAttriName,
131                    CFX_WideString& wsAttriValue) const;
132  bool HasAttribute(const FX_WCHAR* pwsAttriName) const;
133  void RemoveAttribute(const FX_WCHAR* pwsAttriName);
134
135  void GetString(const FX_WCHAR* pwsAttriName,
136                 CFX_WideString& wsAttriValue,
137                 const FX_WCHAR* pwsDefValue = nullptr) const;
138  void SetString(const CFX_WideString& wsAttriName,
139                 const CFX_WideString& wsAttriValue);
140
141  int32_t GetInteger(const FX_WCHAR* pwsAttriName, int32_t iDefValue = 0) const;
142  void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue);
143
144  FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fDefValue = 0) const;
145  void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue);
146
147  void GetTextData(CFX_WideString& wsText) const;
148  void SetTextData(const CFX_WideString& wsText);
149
150  CFX_WideString m_wsTag;
151  std::vector<CFX_WideString> m_Attributes;
152};
153
154class CFDE_XMLText : public CFDE_XMLNode {
155 public:
156  explicit CFDE_XMLText(const CFX_WideString& wsText);
157  ~CFDE_XMLText() override;
158
159  // CFDE_XMLNode
160  void Release() override;
161  FDE_XMLNODETYPE GetType() const override;
162  CFDE_XMLNode* Clone(bool bRecursive) override;
163
164  void GetText(CFX_WideString& wsText) const { wsText = m_wsText; }
165  void SetText(const CFX_WideString& wsText) { m_wsText = wsText; }
166
167  CFX_WideString m_wsText;
168};
169
170class CFDE_XMLDeclaration : public CFDE_XMLNode {
171 public:
172  CFDE_XMLDeclaration() {}
173  ~CFDE_XMLDeclaration() override {}
174};
175
176class CFDE_XMLCharData : public CFDE_XMLDeclaration {
177 public:
178  explicit CFDE_XMLCharData(const CFX_WideString& wsCData);
179  ~CFDE_XMLCharData() override;
180
181  void Release() override;
182  FDE_XMLNODETYPE GetType() const override;
183  CFDE_XMLNode* Clone(bool bRecursive) override;
184
185  void GetCharData(CFX_WideString& wsCharData) const {
186    wsCharData = m_wsCharData;
187  }
188  void SetCharData(const CFX_WideString& wsCData) { m_wsCharData = wsCData; }
189
190  CFX_WideString m_wsCharData;
191};
192
193class CFDE_XMLDoc {
194 public:
195  CFDE_XMLDoc();
196  ~CFDE_XMLDoc();
197
198  bool LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser);
199  int32_t DoLoad(IFX_Pause* pPause = nullptr);
200  void CloseXML();
201  CFDE_XMLNode* GetRoot() const { return m_pRoot; }
202  void SaveXML(CFX_RetainPtr<IFGAS_Stream>& pXMLStream, bool bSaveBOM = true);
203  void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
204                   CFDE_XMLNode* pNode);
205
206 protected:
207  void Reset(bool bInitRoot);
208  void ReleaseParser();
209
210  CFX_RetainPtr<IFGAS_Stream> m_pStream;
211  int32_t m_iStatus;
212  CFDE_XMLNode* m_pRoot;
213  CFDE_XMLSyntaxParser* m_pSyntaxParser;
214  std::unique_ptr<IFDE_XMLParser> m_pXMLParser;
215};
216
217class IFDE_XMLParser {
218 public:
219  virtual ~IFDE_XMLParser() {}
220  virtual int32_t DoParser(IFX_Pause* pPause) = 0;
221};
222
223class CFDE_BlockBuffer {
224 public:
225  explicit CFDE_BlockBuffer(int32_t iAllocStep = 1024 * 1024);
226  ~CFDE_BlockBuffer();
227
228  bool InitBuffer(int32_t iBufferSize = 1024 * 1024);
229  bool IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; }
230  void ReleaseBuffer() { delete this; }
231  FX_WCHAR* GetAvailableBlock(int32_t& iIndexInBlock);
232  inline int32_t GetAllocStep() const { return m_iAllocStep; }
233  inline int32_t& GetDataLengthRef() { return m_iDataLength; }
234  inline void Reset(bool bReserveData = true) {
235    if (!bReserveData) {
236      m_iStartPosition = 0;
237    }
238    m_iDataLength = 0;
239  }
240  void SetTextChar(int32_t iIndex, FX_WCHAR ch);
241  int32_t DeleteTextChars(int32_t iCount, bool bDirection = true);
242  void GetTextData(CFX_WideString& wsTextData,
243                   int32_t iStart = 0,
244                   int32_t iLength = -1) const;
245
246 protected:
247  inline void TextDataIndex2BufIndex(const int32_t iIndex,
248                                     int32_t& iBlockIndex,
249                                     int32_t& iInnerIndex) const;
250  void ClearBuffer();
251
252  CFX_ArrayTemplate<FX_WCHAR*> m_BlockArray;
253  int32_t m_iDataLength;
254  int32_t m_iBufferSize;
255  int32_t m_iAllocStep;
256  int32_t m_iStartPosition;
257};
258
259class CFDE_XMLSyntaxParser {
260 public:
261  CFDE_XMLSyntaxParser();
262  ~CFDE_XMLSyntaxParser();
263
264  void Release() { delete this; }
265  void Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
266            int32_t iXMLPlaneSize,
267            int32_t iTextDataSize = 256);
268
269  FDE_XmlSyntaxResult DoSyntaxParse();
270
271  int32_t GetStatus() const;
272  int32_t GetCurrentPos() const {
273    return m_iParsedChars + (m_pStart - m_pBuffer);
274  }
275  FX_FILESIZE GetCurrentBinaryPos() const;
276  int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
277  int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
278
279  void GetTargetName(CFX_WideString& wsTarget) const {
280    m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength);
281  }
282  void GetTagName(CFX_WideString& wsTag) const {
283    m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength);
284  }
285  void GetAttributeName(CFX_WideString& wsAttriName) const {
286    m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength);
287  }
288  void GetAttributeValue(CFX_WideString& wsAttriValue) const {
289    m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength);
290  }
291  void GetTextData(CFX_WideString& wsText) const {
292    m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength);
293  }
294  void GetTargetData(CFX_WideString& wsData) const {
295    m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength);
296  }
297
298 protected:
299  enum class FDE_XmlSyntaxState {
300    Text,
301    Node,
302    Target,
303    Tag,
304    AttriName,
305    AttriEqualSign,
306    AttriQuotation,
307    AttriValue,
308    Entity,
309    EntityDecimal,
310    EntityHex,
311    CloseInstruction,
312    BreakElement,
313    CloseElement,
314    SkipDeclNode,
315    DeclCharData,
316    SkipComment,
317    SkipCommentOrDecl,
318    SkipCData,
319    TargetData
320  };
321
322  void ParseTextChar(FX_WCHAR ch);
323
324  CFX_RetainPtr<IFGAS_Stream> m_pStream;
325  int32_t m_iXMLPlaneSize;
326  int32_t m_iCurrentPos;
327  int32_t m_iCurrentNodeNum;
328  int32_t m_iLastNodeNum;
329  int32_t m_iParsedChars;
330  int32_t m_iParsedBytes;
331  FX_WCHAR* m_pBuffer;
332  int32_t m_iBufferChars;
333  bool m_bEOS;
334  FX_WCHAR* m_pStart;
335  FX_WCHAR* m_pEnd;
336  FDE_XMLNODE m_CurNode;
337  CFX_StackTemplate<FDE_XMLNODE> m_XMLNodeStack;
338  CFDE_BlockBuffer m_BlockBuffer;
339  int32_t m_iAllocStep;
340  int32_t& m_iDataLength;
341  FX_WCHAR* m_pCurrentBlock;
342  int32_t m_iIndexInBlock;
343  int32_t m_iTextDataLength;
344  FDE_XmlSyntaxResult m_syntaxParserResult;
345  FDE_XmlSyntaxState m_syntaxParserState;
346  FX_WCHAR m_wQuotationMark;
347  int32_t m_iEntityStart;
348  CFX_StackTemplate<uint32_t> m_SkipStack;
349  FX_WCHAR m_SkipChar;
350};
351
352#endif  // XFA_FDE_XML_FDE_XML_IMP_H_
353