1d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann// Copyright 2014 PDFium Authors. All rights reserved.
2d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann// Use of this source code is governed by a BSD-style license that can be
3d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann// found in the LICENSE file.
4d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
5d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
7d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <algorithm>
8d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <memory>
9d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <sstream>
10d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <string>
11d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <utility>
12d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <vector>
13d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
14d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/cfx_utf8decoder.h"
15d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/cfx_widetextbuf.h"
16d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/fx_extension.h"
17d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/xml/cxml_content.h"
18d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/xml/cxml_element.h"
19d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/xml/cxml_parser.h"
20d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "third_party/base/ptr_util.h"
21d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "third_party/base/stl_util.h"
22d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
23d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannnamespace {
24d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
25d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_Normal 0x00
26d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_SpaceChar 0x01
27d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_Letter 0x02
28d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_Digital 0x04
29d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_NameIntro 0x08
30d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_NameChar 0x10
31d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_HexDigital 0x20
32d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40
33d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60
34d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_HexChar 0x60
35d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
36d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannconst uint8_t g_FXCRT_XML_ByteTypes[256] = {
37d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
38d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
39d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
40d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00,
41d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00,
42d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A,
43d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
44d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18,
45d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
46d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
47d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A,
48d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
49d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
50d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
51d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
52d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
53d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
54d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
55d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
56d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
57d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
58d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    0x1A, 0x1A, 0x01, 0x01,
59d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann};
60d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
61d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannconstexpr int kMaxDepth = 1024;
62d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
63d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) {
64d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar);
65d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
66d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
67d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool g_FXCRT_XML_IsDigital(uint8_t ch) {
68d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital);
69d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
70d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
71d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool g_FXCRT_XML_IsNameIntro(uint8_t ch) {
72d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro);
73d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
74d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
75d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool g_FXCRT_XML_IsNameChar(uint8_t ch) {
76d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar);
77d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
78d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
79d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}  // namespace
80d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
81d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. MoltmannCXML_Parser::CXML_Parser()
82d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    : m_nOffset(0),
83d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      m_pBuffer(nullptr),
84d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      m_dwBufferSize(0),
85d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      m_nBufferOffset(0),
86d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      m_dwIndex(0) {}
87d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
88d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. MoltmannCXML_Parser::~CXML_Parser() {}
89d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
90d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool CXML_Parser::Init(const uint8_t* pBuffer, size_t size) {
91d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_pDataAcc = pdfium::MakeUnique<CXML_DataBufAcc>(pBuffer, size);
92d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nOffset = 0;
93d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return ReadNextBlock();
94d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
95d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
96d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool CXML_Parser::ReadNextBlock() {
97d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (!m_pDataAcc->ReadNextBlock())
98d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return false;
99d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
100d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_pBuffer = m_pDataAcc->GetBlockBuffer();
101d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_dwBufferSize = m_pDataAcc->GetBlockSize();
102d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nBufferOffset = 0;
103d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_dwIndex = 0;
104d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return m_dwBufferSize > 0;
105d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
106d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
107d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool CXML_Parser::IsEOF() {
108d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize;
109d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
110d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
111d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::SkipWhiteSpaces() {
112d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
113d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF())
114d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return;
115d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
116d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  do {
117d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    while (m_dwIndex < m_dwBufferSize &&
118d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann           g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) {
119d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      m_dwIndex++;
120d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
121d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
122d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (m_dwIndex < m_dwBufferSize || IsEOF())
123d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      break;
124d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  } while (ReadNextBlock());
125d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
126d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
127d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::GetName(ByteString* space, ByteString* name) {
128d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
129d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF())
130d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return;
131d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
132d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  std::ostringstream buf;
133d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  do {
134d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    while (m_dwIndex < m_dwBufferSize) {
135d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      uint8_t ch = m_pBuffer[m_dwIndex];
136d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (ch == ':') {
137d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        *space = ByteString(buf);
138d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        buf.str("");
139d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      } else if (g_FXCRT_XML_IsNameChar(ch)) {
140d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        buf << static_cast<char>(ch);
141d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      } else {
142d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
143d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      }
144d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      m_dwIndex++;
145d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
146d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
147d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (m_dwIndex < m_dwBufferSize || IsEOF())
148d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      break;
149d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  } while (ReadNextBlock());
150d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  *name = ByteString(buf);
151d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
152d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
153d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::SkipLiterals(const ByteStringView& str) {
154d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
155d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF()) {
156d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return;
157d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  }
158d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  int32_t i = 0, iLen = str.GetLength();
159d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  do {
160d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    while (m_dwIndex < m_dwBufferSize) {
161d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (str[i] != m_pBuffer[m_dwIndex++]) {
162d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        i = 0;
163d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        continue;
164d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      }
165d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      i++;
166d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (i == iLen)
167d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
168d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
169d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
170d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (i == iLen)
171d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      return;
172d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
173d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (m_dwIndex < m_dwBufferSize || IsEOF())
174d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      break;
175d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  } while (ReadNextBlock());
176d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  while (!m_pDataAcc->IsEOF()) {
177d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    ReadNextBlock();
178d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwBufferSize);
179d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  }
180d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_dwIndex = m_dwBufferSize;
181d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
182d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
183d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannuint32_t CXML_Parser::GetCharRef() {
184d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
185d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF())
186d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return 0;
187d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
188d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  uint8_t ch;
189d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  int32_t iState = 0;
190d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  std::ostringstream buf;
191d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  uint32_t code = 0;
192d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  do {
193d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    while (m_dwIndex < m_dwBufferSize) {
194d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      ch = m_pBuffer[m_dwIndex];
195d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      switch (iState) {
196d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 0:
197d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == '#') {
198d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            m_dwIndex++;
199d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 2;
200d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            break;
201d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
202d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          iState = 1;
203d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 1:
204d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          m_dwIndex++;
205d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == ';') {
206d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            std::string ref = buf.str();
207d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            if (ref == "gt")
208d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              code = '>';
209d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            else if (ref == "lt")
210d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              code = '<';
211d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            else if (ref == "amp")
212d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              code = '&';
213d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            else if (ref == "apos")
214d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              code = '\'';
215d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            else if (ref == "quot")
216d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              code = '"';
217d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 10;
218d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            break;
219d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
220d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          buf << static_cast<char>(ch);
221d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          break;
222d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 2:
223d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == 'x') {
224d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            m_dwIndex++;
225d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 4;
226d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            break;
227d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
228d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          iState = 3;
229d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 3:
230d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          m_dwIndex++;
231d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == ';') {
232d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 10;
233d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            break;
234d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
235d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (g_FXCRT_XML_IsDigital(ch))
236d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            code = code * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
237d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          break;
238d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 4:
239d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          m_dwIndex++;
240d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == ';') {
241d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 10;
242d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            break;
243d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
244d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          uint8_t nHex =
245d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar;
246d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (nHex) {
247d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) {
248d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              code = (code << 4) +
249d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                     FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
250d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) {
251d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              code = (code << 4) + ch - 87;
252d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            } else {
253d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              code = (code << 4) + ch - 55;
254d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            }
255d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
256d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          break;
257d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      }
258d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (iState == 10)
259d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
260d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
261d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
262d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
263d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      break;
264d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
265d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  } while (ReadNextBlock());
266d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return code;
267d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
268d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
269d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. MoltmannWideString CXML_Parser::GetAttrValue() {
270d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
271d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF())
272d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return WideString();
273d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
274d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  CFX_UTF8Decoder decoder;
275d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  uint8_t mark = 0;
276d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  uint8_t ch = 0;
277d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  do {
278d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    while (m_dwIndex < m_dwBufferSize) {
279d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      ch = m_pBuffer[m_dwIndex];
280d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (mark == 0) {
281d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        if (ch != '\'' && ch != '"')
282d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          return WideString();
283d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
284d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        mark = ch;
285d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        m_dwIndex++;
286d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        ch = 0;
287d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        continue;
288d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      }
289d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      m_dwIndex++;
290d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (ch == mark)
291d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
292d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
293d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (ch == '&') {
294d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        decoder.AppendCodePoint(GetCharRef());
295d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        if (IsEOF())
296d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          return WideString(decoder.GetResult());
297d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      } else {
298d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        decoder.Input(ch);
299d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      }
300d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
301d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
302d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF())
303d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      break;
304d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  } while (ReadNextBlock());
305d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return WideString(decoder.GetResult());
306d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
307d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
308d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::GetTagName(bool bStartTag,
309d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                             bool* bEndTag,
310d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                             ByteString* space,
311d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                             ByteString* name) {
312d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
313d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF())
314d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return;
315d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
316d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  *bEndTag = false;
317d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  uint8_t ch;
318d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  int32_t iState = bStartTag ? 1 : 0;
319d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  do {
320d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    while (m_dwIndex < m_dwBufferSize) {
321d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      ch = m_pBuffer[m_dwIndex];
322d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      switch (iState) {
323d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 0:
324d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          m_dwIndex++;
325d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch != '<')
326d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            break;
327d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
328d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          iState = 1;
329d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          break;
330d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 1:
331d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == '?') {
332d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            m_dwIndex++;
333d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipLiterals("?>");
334d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 0;
335d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            break;
336d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
337d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == '!') {
338d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            m_dwIndex++;
339d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipLiterals("-->");
340d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 0;
341d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            break;
342d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
343d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == '/') {
344d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            m_dwIndex++;
345d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            GetName(space, name);
346d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            *bEndTag = true;
347d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          } else {
348d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            GetName(space, name);
349d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            *bEndTag = false;
350d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
351d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          return;
352d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      }
353d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
354d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
355d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (m_dwIndex < m_dwBufferSize || IsEOF())
356d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      break;
357d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  } while (ReadNextBlock());
358d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
359d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
360d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannstd::unique_ptr<CXML_Element> CXML_Parser::ParseElement(CXML_Element* pParent,
361d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                                                        bool bStartTag) {
362d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return ParseElementInternal(pParent, bStartTag, 0);
363d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
364d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
365d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannstd::unique_ptr<CXML_Element> CXML_Parser::ParseElementInternal(
366d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    CXML_Element* pParent,
367d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    bool bStartTag,
368d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    int nDepth) {
369d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (nDepth > kMaxDepth)
370d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return nullptr;
371d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
372d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
373d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF())
374d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return nullptr;
375d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
376d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  ByteString tag_name;
377d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  ByteString tag_space;
378d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  bool bEndTag;
379d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  GetTagName(bStartTag, &bEndTag, &tag_space, &tag_name);
380d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (tag_name.IsEmpty() || bEndTag)
381d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return nullptr;
382d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
383d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  auto pElement = pdfium::MakeUnique<CXML_Element>(
384d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      pParent, tag_space.AsStringView(), tag_name.AsStringView());
385d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  do {
386d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    ByteString attr_space;
387d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    ByteString attr_name;
388d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    while (m_dwIndex < m_dwBufferSize) {
389d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      SkipWhiteSpaces();
390d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (IsEOF())
391d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
392d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
393d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex]))
394d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
395d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
396d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      GetName(&attr_space, &attr_name);
397d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      SkipWhiteSpaces();
398d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (IsEOF())
399d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
400d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
401d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (m_pBuffer[m_dwIndex] != '=')
402d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
403d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
404d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      m_dwIndex++;
405d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      SkipWhiteSpaces();
406d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (IsEOF())
407d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
408d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
409d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      WideString attr_value = GetAttrValue();
410d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      pElement->SetAttribute(attr_space, attr_name, attr_value);
411d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
412d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
413d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (m_dwIndex < m_dwBufferSize || IsEOF())
414d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      break;
415d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  } while (ReadNextBlock());
416d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  SkipWhiteSpaces();
417d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF())
418d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return pElement;
419d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
420d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  uint8_t ch = m_pBuffer[m_dwIndex++];
421d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (ch == '/') {
422d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_dwIndex++;
423d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
424d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return pElement;
425d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  }
426d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (ch != '>') {
427d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
428d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return nullptr;
429d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  }
430d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  SkipWhiteSpaces();
431d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (IsEOF())
432d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return pElement;
433d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
434d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  CFX_UTF8Decoder decoder;
435d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  CFX_WideTextBuf content;
436d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  bool bCDATA = false;
437d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  int32_t iState = 0;
438d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  do {
439d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    while (m_dwIndex < m_dwBufferSize) {
440d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      ch = m_pBuffer[m_dwIndex++];
441d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      switch (iState) {
442d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 0:
443d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == '<') {
444d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 1;
445d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          } else if (ch == '&') {
446d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            decoder.ClearStatus();
447d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            decoder.AppendCodePoint(GetCharRef());
448d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          } else {
449d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            decoder.Input(ch);
450d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
451d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          break;
452d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 1:
453d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == '!') {
454d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 2;
455d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          } else if (ch == '?') {
456d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipLiterals("?>");
457d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipWhiteSpaces();
458d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 0;
459d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          } else if (ch == '/') {
460d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            ByteString space;
461d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            ByteString name;
462d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            GetName(&space, &name);
463d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipWhiteSpaces();
464d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            m_dwIndex++;
465d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 10;
466d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          } else {
467d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            content << decoder.GetResult();
468d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            WideString dataStr = content.MakeString();
469d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            if (!bCDATA)
470d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              dataStr.TrimRight(L" \t\r\n");
471d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
472d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            InsertContentSegment(bCDATA, dataStr.AsStringView(),
473d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                                 pElement.get());
474d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            content.Clear();
475d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            decoder.Clear();
476d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            bCDATA = false;
477d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            iState = 0;
478d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            m_dwIndex--;
479d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            std::unique_ptr<CXML_Element> pSubElement =
480d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                ParseElementInternal(pElement.get(), true, nDepth + 1);
481d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            if (!pSubElement)
482d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann              break;
483d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
484d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            pElement->AppendChild(std::move(pSubElement));
485d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipWhiteSpaces();
486d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
487d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          break;
488d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        case 2:
489d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          if (ch == '[') {
490d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipLiterals("]]>");
491d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          } else if (ch == '-') {
492d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            m_dwIndex++;
493d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipLiterals("-->");
494d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          } else {
495d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann            SkipLiterals(">");
496d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          }
497d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          decoder.Clear();
498d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          SkipWhiteSpaces();
499d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          iState = 0;
500d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann          break;
501d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      }
502d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      if (iState == 10) {
503d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann        break;
504d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      }
505d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    }
506d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
507d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF())
508d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann      break;
509d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  } while (ReadNextBlock());
510d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  content << decoder.GetResult();
511d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  WideString dataStr = content.MakeString();
512d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  dataStr.TrimRight(L" \t\r\n");
513d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
514d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  InsertContentSegment(bCDATA, dataStr.AsStringView(), pElement.get());
515d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  content.Clear();
516d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  decoder.Clear();
517d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  bCDATA = false;
518d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  return pElement;
519d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
520d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
521d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::InsertContentSegment(bool bCDATA,
522d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                                       const WideStringView& content,
523d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann                                       CXML_Element* pElement) {
524d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  if (content.IsEmpty())
525d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann    return;
526d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann
527d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann  pElement->AppendChild(pdfium::MakeUnique<CXML_Content>(bCDATA, content));
528d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}
529