14d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann// Copyright 2014 PDFium Authors. All rights reserved.
24d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann// Use of this source code is governed by a BSD-style license that can be
34d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann// found in the LICENSE file.
44d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
54d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
64d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
74d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#ifndef XFA_FDE_XML_CFX_SAXREADER_H_
84d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#define XFA_FDE_XML_CFX_SAXREADER_H_
94d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include <memory>
114d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include <stack>
124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#include "core/fxcrt/fx_basic.h"
144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass CXFA_SAXContext;
164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass CFX_SAXItem {
184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann public:
194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  enum class Type {
204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    Unknown = 0,
214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    Instruction,
224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    Declaration,
234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    Comment,
244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    Tag,
254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    Text,
264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann    CharData,
274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  };
284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  explicit CFX_SAXItem(uint32_t id)
304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann      : m_pNode(nullptr), m_eNode(Type::Unknown), m_dwID(id), m_bSkip(false) {}
314d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CXFA_SAXContext* m_pNode;
334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  Type m_eNode;
344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  const uint32_t m_dwID;
354d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  bool m_bSkip;
364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann};
374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass CFX_SAXFile {
394d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann public:
404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SAXFile();
414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  ~CFX_SAXFile();
424d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  bool StartFile(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile,
444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                 uint32_t dwStart,
454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                 uint32_t dwLen);
464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  bool ReadNextBlock();
474d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void Reset();
484d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
494d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_RetainPtr<IFX_SeekableReadStream> m_pFile;
504d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwStart;
514d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwEnd;
524d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwCur;
534d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint8_t* m_pBuf;
544d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwBufSize;
554d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwBufIndex;
564d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann};
574d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
584d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass CFX_SAXCommentContext;
594d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannenum class CFX_SaxMode;
604d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
614d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannenum CFX_SaxParseMode {
624d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxParseMode_NotConvert_amp = 1 << 0,
634d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxParseMode_NotConvert_lt = 1 << 1,
644d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxParseMode_NotConvert_gt = 1 << 2,
654d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxParseMode_NotConvert_apos = 1 << 3,
664d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxParseMode_NotConvert_quot = 1 << 4,
674d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxParseMode_NotConvert_sharp = 1 << 5,
684d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxParseMode_NotSkipSpace = 1 << 6
694d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann};
704d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
714d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass CXFA_SAXReaderHandler;
724d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
734d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmannclass CFX_SAXReader {
744d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann public:
754d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SAXReader();
764d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  ~CFX_SAXReader();
774d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
784d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t StartParse(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile,
794d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                     uint32_t dwStart = 0,
804d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                     uint32_t dwLen = -1,
814d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann                     uint32_t dwParseMode = 0);
824d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t ContinueParse(IFX_Pause* pPause = nullptr);
834d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void SkipCurrentNode();
844d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void SetHandler(CXFA_SAXReaderHandler* pHandler);
854d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void AppendData(uint8_t ch);
864d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void AppendName(uint8_t ch);
874d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseText();
884d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseNodeStart();
894d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseInstruction();
904d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseDeclOrComment();
914d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseDeclNode();
924d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseComment();
934d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseCommentContent();
944d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseTagName();
954d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseTagAttributeName();
964d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseTagAttributeEqual();
974d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseTagAttributeValue();
984d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseMaybeClose();
994d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseTagClose();
1004d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseTagEnd();
1014d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseTargetData();
1024d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1034d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann private:
1044d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void Reset();
1054d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void Push();
1064d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void Pop();
1074d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SAXItem* GetCurrentItem() const;
1084d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  bool SkipSpace(uint8_t ch);
1094d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void SkipNode();
1104d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void NotifyData();
1114d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void NotifyEnter();
1124d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void NotifyAttribute();
1134d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void NotifyBreak();
1144d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void NotifyClose();
1154d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void NotifyEnd();
1164d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void NotifyTargetData();
1174d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ReallocDataBuffer();
1184d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ReallocNameBuffer();
1194d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  void ParseChar(uint8_t ch);
1204d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1214d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SAXFile m_File;
1224d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CXFA_SAXReaderHandler* m_pHandler;
1234d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t m_iState;
1244d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  std::stack<std::unique_ptr<CFX_SAXItem>> m_Stack;
1254d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwItemID;
1264d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxMode m_eMode;
1274d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SaxMode m_ePrevMode;
1284d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  bool m_bCharData;
1294d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint8_t m_CurByte;
1304d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwDataOffset;
13133357cad1fd1321a2b38d2963e2585f27ce980a2Philip P. Moltmann  CFX_ArrayTemplate<uint8_t> m_SkipStack;
1324d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint8_t m_SkipChar;
1334d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwNodePos;
1344d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint8_t* m_pszData;
1354d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t m_iDataSize;
1364d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t m_iDataLength;
1374d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t m_iEntityStart;
1384d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t m_iDataPos;
1394d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint8_t* m_pszName;
1404d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t m_iNameSize;
1414d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  int32_t m_iNameLength;
1424d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  uint32_t m_dwParseMode;
1434d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann  CFX_SAXCommentContext* m_pCommentContext;
1444d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann};
1454d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann
1464d3acf4ec42bf6e838f9060103aff98fbf170794Philip P. Moltmann#endif  // XFA_FDE_XML_CFX_SAXREADER_H_
147