1d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann// Copyright 2014 PDFium Authors. All rights reserved. 2d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann// Use of this source code is governed by a BSD-style license that can be 3d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann// found in the LICENSE file. 4d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 5d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 7d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <algorithm> 8d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <memory> 9d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <sstream> 10d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <string> 11d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <utility> 12d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include <vector> 13d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 14d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/cfx_utf8decoder.h" 15d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/cfx_widetextbuf.h" 16d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/fx_extension.h" 17d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/xml/cxml_content.h" 18d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/xml/cxml_element.h" 19d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "core/fxcrt/xml/cxml_parser.h" 20d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "third_party/base/ptr_util.h" 21d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#include "third_party/base/stl_util.h" 22d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 23d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannnamespace { 24d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 25d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_Normal 0x00 26d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_SpaceChar 0x01 27d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_Letter 0x02 28d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_Digital 0x04 29d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_NameIntro 0x08 30d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_NameChar 0x10 31d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_HexDigital 0x20 32d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40 33d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60 34d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann#define FXCRTM_XML_CHARTYPE_HexChar 0x60 35d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 36d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannconst uint8_t g_FXCRT_XML_ByteTypes[256] = { 37d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 38d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 39d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 40d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00, 41d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, 42d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, 43d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 44d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18, 45d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 46d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 47d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A, 48d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 49d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 50d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 51d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 52d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 53d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 54d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 55d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 56d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 57d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 58d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 0x1A, 0x1A, 0x01, 0x01, 59d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann}; 60d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 61d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannconstexpr int kMaxDepth = 1024; 62d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 63d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) { 64d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar); 65d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 66d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 67d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool g_FXCRT_XML_IsDigital(uint8_t ch) { 68d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital); 69d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 70d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 71d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool g_FXCRT_XML_IsNameIntro(uint8_t ch) { 72d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro); 73d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 74d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 75d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool g_FXCRT_XML_IsNameChar(uint8_t ch) { 76d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar); 77d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 78d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 79d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} // namespace 80d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 81d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. MoltmannCXML_Parser::CXML_Parser() 82d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann : m_nOffset(0), 83d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_pBuffer(nullptr), 84d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwBufferSize(0), 85d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nBufferOffset(0), 86d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex(0) {} 87d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 88d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. MoltmannCXML_Parser::~CXML_Parser() {} 89d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 90d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool CXML_Parser::Init(const uint8_t* pBuffer, size_t size) { 91d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_pDataAcc = pdfium::MakeUnique<CXML_DataBufAcc>(pBuffer, size); 92d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = 0; 93d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return ReadNextBlock(); 94d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 95d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 96d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool CXML_Parser::ReadNextBlock() { 97d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (!m_pDataAcc->ReadNextBlock()) 98d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return false; 99d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 100d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_pBuffer = m_pDataAcc->GetBlockBuffer(); 101d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwBufferSize = m_pDataAcc->GetBlockSize(); 102d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nBufferOffset = 0; 103d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex = 0; 104d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return m_dwBufferSize > 0; 105d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 106d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 107d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannbool CXML_Parser::IsEOF() { 108d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize; 109d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 110d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 111d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::SkipWhiteSpaces() { 112d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 113d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 114d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return; 115d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 116d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann do { 117d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (m_dwIndex < m_dwBufferSize && 118d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) { 119d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 120d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 121d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 122d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (m_dwIndex < m_dwBufferSize || IsEOF()) 123d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 124d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } while (ReadNextBlock()); 125d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 126d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 127d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::GetName(ByteString* space, ByteString* name) { 128d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 129d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 130d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return; 131d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 132d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann std::ostringstream buf; 133d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann do { 134d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (m_dwIndex < m_dwBufferSize) { 135d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann uint8_t ch = m_pBuffer[m_dwIndex]; 136d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == ':') { 137d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann *space = ByteString(buf); 138d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann buf.str(""); 139d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else if (g_FXCRT_XML_IsNameChar(ch)) { 140d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann buf << static_cast<char>(ch); 141d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else { 142d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 143d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 144d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 145d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 146d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 147d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (m_dwIndex < m_dwBufferSize || IsEOF()) 148d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 149d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } while (ReadNextBlock()); 150d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann *name = ByteString(buf); 151d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 152d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 153d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::SkipLiterals(const ByteStringView& str) { 154d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 155d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) { 156d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return; 157d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 158d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann int32_t i = 0, iLen = str.GetLength(); 159d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann do { 160d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (m_dwIndex < m_dwBufferSize) { 161d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (str[i] != m_pBuffer[m_dwIndex++]) { 162d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann i = 0; 163d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann continue; 164d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 165d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann i++; 166d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (i == iLen) 167d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 168d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 169d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 170d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (i == iLen) 171d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return; 172d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 173d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (m_dwIndex < m_dwBufferSize || IsEOF()) 174d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 175d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } while (ReadNextBlock()); 176d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (!m_pDataAcc->IsEOF()) { 177d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ReadNextBlock(); 178d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwBufferSize); 179d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 180d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex = m_dwBufferSize; 181d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 182d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 183d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannuint32_t CXML_Parser::GetCharRef() { 184d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 185d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 186d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return 0; 187d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 188d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann uint8_t ch; 189d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann int32_t iState = 0; 190d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann std::ostringstream buf; 191d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann uint32_t code = 0; 192d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann do { 193d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (m_dwIndex < m_dwBufferSize) { 194d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ch = m_pBuffer[m_dwIndex]; 195d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann switch (iState) { 196d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 0: 197d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '#') { 198d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 199d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 2; 200d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 201d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 202d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 1; 203d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 1: 204d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 205d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == ';') { 206d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann std::string ref = buf.str(); 207d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ref == "gt") 208d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = '>'; 209d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann else if (ref == "lt") 210d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = '<'; 211d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann else if (ref == "amp") 212d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = '&'; 213d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann else if (ref == "apos") 214d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = '\''; 215d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann else if (ref == "quot") 216d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = '"'; 217d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 10; 218d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 219d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 220d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann buf << static_cast<char>(ch); 221d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 222d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 2: 223d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == 'x') { 224d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 225d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 4; 226d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 227d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 228d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 3; 229d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 3: 230d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 231d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == ';') { 232d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 10; 233d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 234d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 235d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (g_FXCRT_XML_IsDigital(ch)) 236d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = code * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); 237d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 238d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 4: 239d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 240d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == ';') { 241d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 10; 242d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 243d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 244d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann uint8_t nHex = 245d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar; 246d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (nHex) { 247d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) { 248d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = (code << 4) + 249d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); 250d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) { 251d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = (code << 4) + ch - 87; 252d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else { 253d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann code = (code << 4) + ch - 55; 254d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 255d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 256d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 257d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 258d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (iState == 10) 259d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 260d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 261d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 262d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) { 263d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 264d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 265d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } while (ReadNextBlock()); 266d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return code; 267d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 268d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 269d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. MoltmannWideString CXML_Parser::GetAttrValue() { 270d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 271d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 272d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return WideString(); 273d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 274d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann CFX_UTF8Decoder decoder; 275d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann uint8_t mark = 0; 276d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann uint8_t ch = 0; 277d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann do { 278d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (m_dwIndex < m_dwBufferSize) { 279d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ch = m_pBuffer[m_dwIndex]; 280d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (mark == 0) { 281d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch != '\'' && ch != '"') 282d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return WideString(); 283d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 284d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann mark = ch; 285d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 286d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ch = 0; 287d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann continue; 288d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 289d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 290d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == mark) 291d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 292d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 293d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '&') { 294d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann decoder.AppendCodePoint(GetCharRef()); 295d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 296d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return WideString(decoder.GetResult()); 297d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else { 298d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann decoder.Input(ch); 299d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 300d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 301d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 302d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) 303d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 304d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } while (ReadNextBlock()); 305d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return WideString(decoder.GetResult()); 306d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 307d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 308d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::GetTagName(bool bStartTag, 309d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann bool* bEndTag, 310d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ByteString* space, 311d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ByteString* name) { 312d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 313d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 314d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return; 315d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 316d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann *bEndTag = false; 317d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann uint8_t ch; 318d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann int32_t iState = bStartTag ? 1 : 0; 319d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann do { 320d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (m_dwIndex < m_dwBufferSize) { 321d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ch = m_pBuffer[m_dwIndex]; 322d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann switch (iState) { 323d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 0: 324d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 325d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch != '<') 326d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 327d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 328d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 1; 329d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 330d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 1: 331d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '?') { 332d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 333d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipLiterals("?>"); 334d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 0; 335d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 336d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 337d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '!') { 338d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 339d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipLiterals("-->"); 340d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 0; 341d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 342d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 343d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '/') { 344d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 345d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann GetName(space, name); 346d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann *bEndTag = true; 347d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else { 348d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann GetName(space, name); 349d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann *bEndTag = false; 350d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 351d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return; 352d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 353d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 354d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 355d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (m_dwIndex < m_dwBufferSize || IsEOF()) 356d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 357d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } while (ReadNextBlock()); 358d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 359d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 360d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannstd::unique_ptr<CXML_Element> CXML_Parser::ParseElement(CXML_Element* pParent, 361d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann bool bStartTag) { 362d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return ParseElementInternal(pParent, bStartTag, 0); 363d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 364d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 365d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannstd::unique_ptr<CXML_Element> CXML_Parser::ParseElementInternal( 366d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann CXML_Element* pParent, 367d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann bool bStartTag, 368d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann int nDepth) { 369d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (nDepth > kMaxDepth) 370d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return nullptr; 371d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 372d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 373d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 374d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return nullptr; 375d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 376d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ByteString tag_name; 377d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ByteString tag_space; 378d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann bool bEndTag; 379d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann GetTagName(bStartTag, &bEndTag, &tag_space, &tag_name); 380d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (tag_name.IsEmpty() || bEndTag) 381d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return nullptr; 382d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 383d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann auto pElement = pdfium::MakeUnique<CXML_Element>( 384d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann pParent, tag_space.AsStringView(), tag_name.AsStringView()); 385d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann do { 386d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ByteString attr_space; 387d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ByteString attr_name; 388d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (m_dwIndex < m_dwBufferSize) { 389d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 390d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 391d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 392d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 393d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) 394d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 395d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 396d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann GetName(&attr_space, &attr_name); 397d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 398d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 399d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 400d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 401d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (m_pBuffer[m_dwIndex] != '=') 402d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 403d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 404d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 405d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 406d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 407d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 408d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 409d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann WideString attr_value = GetAttrValue(); 410d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann pElement->SetAttribute(attr_space, attr_name, attr_value); 411d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 412d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 413d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (m_dwIndex < m_dwBufferSize || IsEOF()) 414d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 415d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } while (ReadNextBlock()); 416d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 417d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 418d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return pElement; 419d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 420d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann uint8_t ch = m_pBuffer[m_dwIndex++]; 421d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '/') { 422d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 423d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 424d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return pElement; 425d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 426d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch != '>') { 427d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 428d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return nullptr; 429d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 430d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 431d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (IsEOF()) 432d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return pElement; 433d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 434d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann CFX_UTF8Decoder decoder; 435d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann CFX_WideTextBuf content; 436d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann bool bCDATA = false; 437d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann int32_t iState = 0; 438d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann do { 439d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann while (m_dwIndex < m_dwBufferSize) { 440d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ch = m_pBuffer[m_dwIndex++]; 441d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann switch (iState) { 442d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 0: 443d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '<') { 444d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 1; 445d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else if (ch == '&') { 446d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann decoder.ClearStatus(); 447d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann decoder.AppendCodePoint(GetCharRef()); 448d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else { 449d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann decoder.Input(ch); 450d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 451d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 452d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 1: 453d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '!') { 454d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 2; 455d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else if (ch == '?') { 456d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipLiterals("?>"); 457d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 458d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 0; 459d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else if (ch == '/') { 460d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ByteString space; 461d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ByteString name; 462d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann GetName(&space, &name); 463d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 464d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 465d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 10; 466d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else { 467d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann content << decoder.GetResult(); 468d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann WideString dataStr = content.MakeString(); 469d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (!bCDATA) 470d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann dataStr.TrimRight(L" \t\r\n"); 471d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 472d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann InsertContentSegment(bCDATA, dataStr.AsStringView(), 473d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann pElement.get()); 474d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann content.Clear(); 475d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann decoder.Clear(); 476d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann bCDATA = false; 477d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 0; 478d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex--; 479d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann std::unique_ptr<CXML_Element> pSubElement = 480d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann ParseElementInternal(pElement.get(), true, nDepth + 1); 481d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (!pSubElement) 482d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 483d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 484d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann pElement->AppendChild(std::move(pSubElement)); 485d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 486d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 487d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 488d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann case 2: 489d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (ch == '[') { 490d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipLiterals("]]>"); 491d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else if (ch == '-') { 492d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_dwIndex++; 493d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipLiterals("-->"); 494d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } else { 495d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipLiterals(">"); 496d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 497d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann decoder.Clear(); 498d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann SkipWhiteSpaces(); 499d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann iState = 0; 500d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 501d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 502d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (iState == 10) { 503d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 504d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 505d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } 506d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 507d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) 508d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann break; 509d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann } while (ReadNextBlock()); 510d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann content << decoder.GetResult(); 511d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann WideString dataStr = content.MakeString(); 512d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann dataStr.TrimRight(L" \t\r\n"); 513d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 514d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann InsertContentSegment(bCDATA, dataStr.AsStringView(), pElement.get()); 515d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann content.Clear(); 516d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann decoder.Clear(); 517d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann bCDATA = false; 518d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return pElement; 519d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 520d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 521d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmannvoid CXML_Parser::InsertContentSegment(bool bCDATA, 522d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann const WideStringView& content, 523d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann CXML_Element* pElement) { 524d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann if (content.IsEmpty()) 525d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann return; 526d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann 527d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann pElement->AppendChild(pdfium::MakeUnique<CXML_Content>(bCDATA, content)); 528d904c1ec7e8d1d86ed56f0dd252435d12cd345aePhilip P. Moltmann} 529