cxfa_simple_parser.cpp revision 5ae9d0c6fd838a2967cca72aa5751b51dadc2769
1// Copyright 2016 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#include "xfa/fxfa/parser/cxfa_simple_parser.h" 8 9#include <utility> 10 11#include "core/fxcrt/fx_ext.h" 12#include "third_party/base/ptr_util.h" 13#include "xfa/fgas/crt/fgas_codepage.h" 14#include "xfa/fxfa/fxfa.h" 15#include "xfa/fxfa/parser/cxfa_document.h" 16#include "xfa/fxfa/parser/cxfa_widetextread.h" 17#include "xfa/fxfa/parser/cxfa_xml_parser.h" 18#include "xfa/fxfa/parser/xfa_basic_data.h" 19#include "xfa/fxfa/parser/xfa_utils.h" 20#include "xfa/fxfa/xfa_checksum.h" 21 22namespace { 23 24CFDE_XMLNode* GetDocumentNode(CFDE_XMLDoc* pXMLDoc, 25 bool bVerifyWellFormness = false) { 26 if (!pXMLDoc) 27 return nullptr; 28 29 for (CFDE_XMLNode* pXMLNode = 30 pXMLDoc->GetRoot()->GetNodeItem(CFDE_XMLNode::FirstChild); 31 pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { 32 if (pXMLNode->GetType() != FDE_XMLNODE_Element) 33 continue; 34 35 if (!bVerifyWellFormness) 36 return pXMLNode; 37 38 for (CFDE_XMLNode* pNextNode = 39 pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling); 40 pNextNode; 41 pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { 42 if (pNextNode->GetType() == FDE_XMLNODE_Element) 43 return nullptr; 44 } 45 return pXMLNode; 46 } 47 return nullptr; 48} 49 50void GetElementTagNamespaceURI(CFDE_XMLElement* pElement, 51 CFX_WideString& wsNamespaceURI) { 52 CFX_WideString wsNodeStr; 53 pElement->GetNamespacePrefix(wsNodeStr); 54 if (!XFA_FDEExtension_ResolveNamespaceQualifier( 55 pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) { 56 wsNamespaceURI.clear(); 57 } 58} 59 60bool MatchNodeName(CFDE_XMLNode* pNode, 61 const CFX_WideStringC& wsLocalTagName, 62 const CFX_WideStringC& wsNamespaceURIPrefix, 63 uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) { 64 if (!pNode || pNode->GetType() != FDE_XMLNODE_Element) 65 return false; 66 67 CFDE_XMLElement* pElement = reinterpret_cast<CFDE_XMLElement*>(pNode); 68 CFX_WideString wsNodeStr; 69 pElement->GetLocalTagName(wsNodeStr); 70 if (wsNodeStr != wsLocalTagName) 71 return false; 72 73 GetElementTagNamespaceURI(pElement, wsNodeStr); 74 if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH) 75 return true; 76 if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) { 77 return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) == 78 wsNamespaceURIPrefix; 79 } 80 return wsNodeStr == wsNamespaceURIPrefix; 81} 82 83bool GetAttributeLocalName(const CFX_WideStringC& wsAttributeName, 84 CFX_WideString& wsLocalAttrName) { 85 CFX_WideString wsAttrName(wsAttributeName); 86 FX_STRSIZE iFind = wsAttrName.Find(L':', 0); 87 if (iFind < 0) { 88 wsLocalAttrName = wsAttrName; 89 return false; 90 } 91 wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1); 92 return true; 93} 94 95bool ResolveAttribute(CFDE_XMLElement* pElement, 96 const CFX_WideStringC& wsAttributeName, 97 CFX_WideString& wsLocalAttrName, 98 CFX_WideString& wsNamespaceURI) { 99 CFX_WideString wsAttrName(wsAttributeName); 100 CFX_WideString wsNSPrefix; 101 if (GetAttributeLocalName(wsAttributeName, wsLocalAttrName)) { 102 wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() - 103 wsLocalAttrName.GetLength() - 1); 104 } 105 if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" || 106 wsNSPrefix == L"xml") { 107 return false; 108 } 109 if (!XFA_FDEExtension_ResolveNamespaceQualifier( 110 pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) { 111 wsNamespaceURI.clear(); 112 return false; 113 } 114 return true; 115} 116 117bool FindAttributeWithNS(CFDE_XMLElement* pElement, 118 const CFX_WideStringC& wsLocalAttributeName, 119 const CFX_WideStringC& wsNamespaceURIPrefix, 120 CFX_WideString& wsValue, 121 bool bMatchNSAsPrefix = false) { 122 if (!pElement) 123 return false; 124 125 CFX_WideString wsAttrName; 126 CFX_WideString wsAttrValue; 127 CFX_WideString wsAttrNS; 128 for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount; 129 i++) { 130 pElement->GetAttribute(i, wsAttrName, wsAttrValue); 131 FX_STRSIZE iFind = wsAttrName.Find(L':', 0); 132 CFX_WideString wsNSPrefix; 133 if (iFind < 0) { 134 if (wsLocalAttributeName != wsAttrName) 135 continue; 136 } else { 137 if (wsLocalAttributeName != 138 wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) { 139 continue; 140 } 141 wsNSPrefix = wsAttrName.Left(iFind); 142 } 143 if (!XFA_FDEExtension_ResolveNamespaceQualifier( 144 pElement, wsNSPrefix.AsStringC(), wsAttrNS)) { 145 continue; 146 } 147 if (bMatchNSAsPrefix) { 148 if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) != 149 wsNamespaceURIPrefix) { 150 continue; 151 } 152 } else { 153 if (wsAttrNS != wsNamespaceURIPrefix) 154 continue; 155 } 156 wsValue = wsAttrValue; 157 return true; 158 } 159 return false; 160} 161 162CFDE_XMLNode* GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) { 163 if (MatchNodeName(pXMLDocumentNode, 164 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName, 165 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, 166 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { 167 return pXMLDocumentNode; 168 } 169 if (!MatchNodeName(pXMLDocumentNode, 170 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName, 171 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI, 172 XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) { 173 return nullptr; 174 } 175 for (CFDE_XMLNode* pDatasetsNode = 176 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); 177 pDatasetsNode; 178 pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { 179 if (!MatchNodeName(pDatasetsNode, 180 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName, 181 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, 182 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { 183 continue; 184 } 185 return pDatasetsNode; 186 } 187 return nullptr; 188} 189 190bool IsStringAllWhitespace(CFX_WideString wsText) { 191 wsText.TrimRight(L"\x20\x9\xD\xA"); 192 return wsText.IsEmpty(); 193} 194 195void ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode, 196 CFX_WideString& wsOutput) { 197 for (CFDE_XMLNode* pXMLChild = 198 pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); 199 pXMLChild; 200 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { 201 switch (pXMLChild->GetType()) { 202 case FDE_XMLNODE_Element: { 203 CFX_WideString wsTextData; 204 static_cast<CFDE_XMLElement*>(pXMLChild)->GetTextData(wsTextData); 205 wsTextData += L"\n"; 206 wsOutput += wsTextData; 207 break; 208 } 209 case FDE_XMLNODE_Text: { 210 CFX_WideString wsText; 211 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText); 212 if (IsStringAllWhitespace(wsText)) 213 continue; 214 215 wsOutput = wsText; 216 break; 217 } 218 case FDE_XMLNODE_CharData: { 219 CFX_WideString wsCharData; 220 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsCharData); 221 if (IsStringAllWhitespace(wsCharData)) 222 continue; 223 224 wsOutput = wsCharData; 225 break; 226 } 227 default: 228 ASSERT(false); 229 break; 230 } 231 } 232} 233 234const XFA_PACKETINFO* GetPacketByName(const CFX_WideStringC& wsName) { 235 if (wsName.IsEmpty()) 236 return nullptr; 237 238 uint32_t uHash = FX_HashCode_GetW(wsName, false); 239 int32_t iStart = 0; 240 int32_t iEnd = g_iXFAPacketCount - 1; 241 do { 242 int32_t iMid = (iStart + iEnd) / 2; 243 const XFA_PACKETINFO* pInfo = g_XFAPacketData + iMid; 244 if (uHash == pInfo->uHash) 245 return pInfo; 246 if (uHash < pInfo->uHash) 247 iEnd = iMid - 1; 248 else 249 iStart = iMid + 1; 250 } while (iStart <= iEnd); 251 return nullptr; 252} 253 254} // namespace 255 256bool XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) { 257 if (pRichTextXMLNode) { 258 CFX_WideString wsNamespaceURI; 259 GetElementTagNamespaceURI(pRichTextXMLNode, wsNamespaceURI); 260 if (wsNamespaceURI == L"http://www.w3.org/1999/xhtml") 261 return true; 262 } 263 return false; 264} 265 266CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory, 267 bool bDocumentParser) 268 : m_pXMLParser(nullptr), 269 m_pXMLDoc(nullptr), 270 m_pStream(nullptr), 271 m_pFileRead(nullptr), 272 m_pFactory(pFactory), 273 m_pRootNode(nullptr), 274 m_ePacketID(XFA_XDPPACKET_UNKNOWN), 275 m_bDocumentParser(bDocumentParser) {} 276 277CXFA_SimpleParser::~CXFA_SimpleParser() {} 278 279void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) { 280 m_pFactory = pFactory; 281} 282 283int32_t CXFA_SimpleParser::StartParse( 284 const CFX_RetainPtr<IFX_SeekableReadStream>& pStream, 285 XFA_XDPPACKET ePacketID) { 286 CloseParser(); 287 m_pFileRead = pStream; 288 m_pStream = IFGAS_Stream::CreateStream( 289 pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text); 290 if (!m_pStream) 291 return XFA_PARSESTATUS_StreamErr; 292 293 uint16_t wCodePage = m_pStream->GetCodePage(); 294 if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE && 295 wCodePage != FX_CODEPAGE_UTF8) { 296 m_pStream->SetCodePage(FX_CODEPAGE_UTF8); 297 } 298 m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>(); 299 auto pNewParser = 300 pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream); 301 m_pXMLParser = pNewParser.get(); 302 if (!m_pXMLDoc->LoadXML(std::move(pNewParser))) 303 return XFA_PARSESTATUS_StatusErr; 304 305 m_ePacketID = ePacketID; 306 return XFA_PARSESTATUS_Ready; 307} 308 309int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) { 310 if (!m_pXMLDoc || m_ePacketID == XFA_XDPPACKET_UNKNOWN) 311 return XFA_PARSESTATUS_StatusErr; 312 313 int32_t iRet = m_pXMLDoc->DoLoad(pPause); 314 if (iRet < 0) 315 return XFA_PARSESTATUS_SyntaxErr; 316 if (iRet < 100) 317 return iRet / 2; 318 319 m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID); 320 m_pXMLDoc->CloseXML(); 321 m_pStream.Reset(); 322 if (!m_pRootNode) 323 return XFA_PARSESTATUS_StatusErr; 324 325 return XFA_PARSESTATUS_Done; 326} 327 328int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML, 329 CFDE_XMLNode*& pXMLNode, 330 IFX_Pause* pPause) { 331 CloseParser(); 332 pXMLNode = nullptr; 333 m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>(); 334 auto pStream = pdfium::MakeRetain<CXFA_WideTextRead>(wsXML); 335 auto pParser = 336 pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), pStream); 337 pParser->m_dwCheckStatus = 0x03; 338 if (!m_pXMLDoc->LoadXML(std::move(pParser))) 339 return XFA_PARSESTATUS_StatusErr; 340 341 int32_t iRet = m_pXMLDoc->DoLoad(pPause); 342 if (iRet < 0 || iRet >= 100) 343 m_pXMLDoc->CloseXML(); 344 if (iRet < 0) 345 return XFA_PARSESTATUS_SyntaxErr; 346 if (iRet < 100) 347 return iRet / 2; 348 349 pXMLNode = GetDocumentNode(m_pXMLDoc.get()); 350 return XFA_PARSESTATUS_Done; 351} 352 353void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode, 354 CFDE_XMLNode* pXMLNode) { 355 XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID(); 356 if (ePacketID == XFA_XDPPACKET_Datasets) { 357 if (pXFANode->GetElementType() == XFA_Element::DataValue) { 358 for (CFDE_XMLNode* pXMLChild = 359 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); 360 pXMLChild; 361 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { 362 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); 363 if (eNodeType == FDE_XMLNODE_Instruction) 364 continue; 365 366 if (eNodeType == FDE_XMLNODE_Element) { 367 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, 368 XFA_Element::DataValue); 369 if (!pXFAChild) 370 return; 371 372 CFX_WideString wsNodeStr; 373 CFDE_XMLElement* child = static_cast<CFDE_XMLElement*>(pXMLChild); 374 child->GetLocalTagName(wsNodeStr); 375 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr); 376 CFX_WideString wsChildValue; 377 XFA_GetPlainTextFromRichText(child, wsChildValue); 378 if (!wsChildValue.IsEmpty()) 379 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue); 380 381 pXFANode->InsertChild(pXFAChild); 382 pXFAChild->SetXMLMappingNode(pXMLChild); 383 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 384 break; 385 } 386 } 387 m_pRootNode = pXFANode; 388 } else { 389 m_pRootNode = DataLoader(pXFANode, pXMLNode, true); 390 } 391 } else if (pXFANode->IsContentNode()) { 392 ParseContentNode(pXFANode, pXMLNode, ePacketID); 393 m_pRootNode = pXFANode; 394 } else { 395 m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true); 396 } 397} 398 399CXFA_Node* CXFA_SimpleParser::GetRootNode() const { 400 return m_pRootNode; 401} 402 403CFDE_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const { 404 return m_pXMLDoc.get(); 405} 406 407bool XFA_FDEExtension_ResolveNamespaceQualifier( 408 CFDE_XMLElement* pNode, 409 const CFX_WideStringC& wsQualifier, 410 CFX_WideString& wsNamespaceURI) { 411 if (!pNode) 412 return false; 413 414 CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root); 415 CFX_WideString wsNSAttribute; 416 bool bRet = false; 417 if (wsQualifier.IsEmpty()) { 418 wsNSAttribute = L"xmlns"; 419 bRet = true; 420 } else { 421 wsNSAttribute = L"xmlns:" + wsQualifier; 422 } 423 for (; pNode != pFakeRoot; pNode = static_cast<CFDE_XMLElement*>( 424 pNode->GetNodeItem(CFDE_XMLNode::Parent))) { 425 if (pNode->GetType() != FDE_XMLNODE_Element) 426 continue; 427 428 if (pNode->HasAttribute(wsNSAttribute.c_str())) { 429 pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI); 430 return true; 431 } 432 } 433 wsNamespaceURI.clear(); 434 return bRet; 435} 436 437CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode, 438 XFA_XDPPACKET ePacketID) { 439 switch (ePacketID) { 440 case XFA_XDPPACKET_UNKNOWN: 441 return nullptr; 442 case XFA_XDPPACKET_XDP: 443 return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID); 444 case XFA_XDPPACKET_Config: 445 return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID); 446 case XFA_XDPPACKET_Template: 447 case XFA_XDPPACKET_Form: 448 return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID); 449 case XFA_XDPPACKET_Datasets: 450 return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID); 451 case XFA_XDPPACKET_Xdc: 452 return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID); 453 case XFA_XDPPACKET_LocaleSet: 454 case XFA_XDPPACKET_ConnectionSet: 455 case XFA_XDPPACKET_SourceSet: 456 return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode, 457 ePacketID); 458 default: 459 return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID); 460 } 461} 462 463CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP( 464 CFDE_XMLNode* pXMLDocumentNode, 465 XFA_XDPPACKET ePacketID) { 466 if (!MatchNodeName(pXMLDocumentNode, 467 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName, 468 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI, 469 XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) { 470 return nullptr; 471 } 472 CXFA_Node* pXFARootNode = 473 m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Xfa); 474 if (!pXFARootNode) 475 return nullptr; 476 477 m_pRootNode = pXFARootNode; 478 pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa"); 479 { 480 CFDE_XMLElement* pElement = static_cast<CFDE_XMLElement*>(pXMLDocumentNode); 481 int32_t iAttributeCount = pElement->CountAttributes(); 482 for (int32_t i = 0; i < iAttributeCount; i++) { 483 CFX_WideString wsAttriName, wsAttriValue; 484 pElement->GetAttribute(i, wsAttriName, wsAttriValue); 485 if (wsAttriName == L"uuid") 486 pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue); 487 else if (wsAttriName == L"timeStamp") 488 pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue); 489 } 490 } 491 492 CFDE_XMLNode* pXMLConfigDOMRoot = nullptr; 493 CXFA_Node* pXFAConfigDOMRoot = nullptr; 494 { 495 for (CFDE_XMLNode* pChildItem = 496 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); 497 pChildItem; 498 pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) { 499 const XFA_PACKETINFO* pPacketInfo = 500 XFA_GetPacketByIndex(XFA_PACKET_Config); 501 if (!MatchNodeName(pChildItem, pPacketInfo->pName, pPacketInfo->pURI, 502 pPacketInfo->eFlags)) { 503 continue; 504 } 505 if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) { 506 return nullptr; 507 } 508 pXMLConfigDOMRoot = pChildItem; 509 pXFAConfigDOMRoot = 510 ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config); 511 pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr); 512 } 513 } 514 515 CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr; 516 CFDE_XMLNode* pXMLFormDOMRoot = nullptr; 517 CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr; 518 { 519 for (CFDE_XMLNode* pChildItem = 520 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); 521 pChildItem; 522 pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) { 523 if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element) 524 continue; 525 if (pChildItem == pXMLConfigDOMRoot) 526 continue; 527 528 CFDE_XMLElement* pElement = 529 reinterpret_cast<CFDE_XMLElement*>(pChildItem); 530 CFX_WideString wsPacketName; 531 pElement->GetLocalTagName(wsPacketName); 532 const XFA_PACKETINFO* pPacketInfo = 533 GetPacketByName(wsPacketName.AsStringC()); 534 if (pPacketInfo && pPacketInfo->pURI) { 535 if (!MatchNodeName(pElement, pPacketInfo->pName, pPacketInfo->pURI, 536 pPacketInfo->eFlags)) { 537 pPacketInfo = nullptr; 538 } 539 } 540 XFA_XDPPACKET ePacket = 541 pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER; 542 if (ePacket == XFA_XDPPACKET_XDP) 543 continue; 544 if (ePacket == XFA_XDPPACKET_Datasets) { 545 if (pXMLDatasetsDOMRoot) 546 return nullptr; 547 548 pXMLDatasetsDOMRoot = pElement; 549 } else if (ePacket == XFA_XDPPACKET_Form) { 550 if (pXMLFormDOMRoot) 551 return nullptr; 552 553 pXMLFormDOMRoot = pElement; 554 } else if (ePacket == XFA_XDPPACKET_Template) { 555 if (pXMLTemplateDOMRoot) { 556 // Found a duplicate template packet. 557 return nullptr; 558 } 559 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); 560 if (pPacketNode) { 561 pXMLTemplateDOMRoot = pElement; 562 pXFARootNode->InsertChild(pPacketNode); 563 } 564 } else { 565 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); 566 if (pPacketNode) { 567 if (pPacketInfo && 568 (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) && 569 pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) { 570 return nullptr; 571 } 572 pXFARootNode->InsertChild(pPacketNode); 573 } 574 } 575 } 576 } 577 578 if (!pXMLTemplateDOMRoot) { 579 // No template is found. 580 return nullptr; 581 } 582 if (pXMLDatasetsDOMRoot) { 583 CXFA_Node* pPacketNode = 584 ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets); 585 if (pPacketNode) 586 pXFARootNode->InsertChild(pPacketNode); 587 } 588 if (pXMLFormDOMRoot) { 589 CXFA_Node* pPacketNode = 590 ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form); 591 if (pPacketNode) 592 pXFARootNode->InsertChild(pPacketNode); 593 } 594 pXFARootNode->SetXMLMappingNode(pXMLDocumentNode); 595 return pXFARootNode; 596} 597 598CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config( 599 CFDE_XMLNode* pXMLDocumentNode, 600 XFA_XDPPACKET ePacketID) { 601 if (!MatchNodeName(pXMLDocumentNode, 602 XFA_GetPacketByIndex(XFA_PACKET_Config)->pName, 603 XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI, 604 XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) { 605 return nullptr; 606 } 607 CXFA_Node* pNode = 608 m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_Element::Config); 609 if (!pNode) 610 return nullptr; 611 612 pNode->SetCData(XFA_ATTRIBUTE_Name, 613 XFA_GetPacketByIndex(XFA_PACKET_Config)->pName); 614 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true)) 615 return nullptr; 616 617 pNode->SetXMLMappingNode(pXMLDocumentNode); 618 return pNode; 619} 620 621CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm( 622 CFDE_XMLNode* pXMLDocumentNode, 623 XFA_XDPPACKET ePacketID) { 624 CXFA_Node* pNode = nullptr; 625 if (ePacketID == XFA_XDPPACKET_Template) { 626 if (MatchNodeName(pXMLDocumentNode, 627 XFA_GetPacketByIndex(XFA_PACKET_Template)->pName, 628 XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI, 629 XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) { 630 pNode = 631 m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_Element::Template); 632 if (!pNode) 633 return nullptr; 634 635 pNode->SetCData(XFA_ATTRIBUTE_Name, 636 XFA_GetPacketByIndex(XFA_PACKET_Template)->pName); 637 if (m_bDocumentParser) { 638 CFX_WideString wsNamespaceURI; 639 CFDE_XMLElement* pXMLDocumentElement = 640 static_cast<CFDE_XMLElement*>(pXMLDocumentNode); 641 pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI); 642 if (wsNamespaceURI.IsEmpty()) 643 pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI); 644 645 pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI); 646 } 647 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true)) 648 return nullptr; 649 } 650 } else if (ePacketID == XFA_XDPPACKET_Form) { 651 if (MatchNodeName(pXMLDocumentNode, 652 XFA_GetPacketByIndex(XFA_PACKET_Form)->pName, 653 XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI, 654 XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) { 655 CFDE_XMLElement* pXMLDocumentElement = 656 static_cast<CFDE_XMLElement*>(pXMLDocumentNode); 657 CFX_WideString wsChecksum; 658 pXMLDocumentElement->GetString(L"checksum", wsChecksum); 659 if (wsChecksum.GetLength() != 28 || 660 m_pXMLParser->m_dwCheckStatus != 0x03) { 661 return nullptr; 662 } 663 std::unique_ptr<CXFA_ChecksumContext> pChecksum(new CXFA_ChecksumContext); 664 pChecksum->StartChecksum(); 665 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0], 666 m_pXMLParser->m_nSize[0]); 667 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1], 668 m_pXMLParser->m_nSize[1]); 669 pChecksum->FinishChecksum(); 670 CFX_ByteString bsCheck = pChecksum->GetChecksum(); 671 if (bsCheck != wsChecksum.UTF8Encode()) 672 return nullptr; 673 674 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_Element::Form); 675 if (!pNode) 676 return nullptr; 677 678 pNode->SetCData(XFA_ATTRIBUTE_Name, 679 XFA_GetPacketByIndex(XFA_PACKET_Form)->pName); 680 pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC()); 681 CXFA_Node* pTemplateRoot = 682 m_pRootNode->GetFirstChildByClass(XFA_Element::Template); 683 CXFA_Node* pTemplateChosen = 684 pTemplateRoot 685 ? pTemplateRoot->GetFirstChildByClass(XFA_Element::Subform) 686 : nullptr; 687 bool bUseAttribute = true; 688 if (pTemplateChosen && 689 pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) != 690 XFA_ATTRIBUTEENUM_Auto) { 691 bUseAttribute = false; 692 } 693 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute)) 694 return nullptr; 695 } 696 } 697 if (pNode) 698 pNode->SetXMLMappingNode(pXMLDocumentNode); 699 700 return pNode; 701} 702 703CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data( 704 CFDE_XMLNode* pXMLDocumentNode, 705 XFA_XDPPACKET ePacketID) { 706 CFDE_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode); 707 if (pDatasetsXMLNode) { 708 CXFA_Node* pNode = 709 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataModel); 710 if (!pNode) 711 return nullptr; 712 713 pNode->SetCData(XFA_ATTRIBUTE_Name, 714 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName); 715 if (!DataLoader(pNode, pDatasetsXMLNode, false)) 716 return nullptr; 717 718 pNode->SetXMLMappingNode(pDatasetsXMLNode); 719 return pNode; 720 } 721 722 CFDE_XMLNode* pDataXMLNode = nullptr; 723 if (MatchNodeName(pXMLDocumentNode, L"data", 724 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, 725 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { 726 static_cast<CFDE_XMLElement*>(pXMLDocumentNode) 727 ->RemoveAttribute(L"xmlns:xfa"); 728 pDataXMLNode = pXMLDocumentNode; 729 } else { 730 CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data"); 731 CFDE_XMLNode* pParentXMLNode = 732 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent); 733 if (pParentXMLNode) 734 pParentXMLNode->RemoveChildNode(pXMLDocumentNode); 735 736 ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element); 737 if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) { 738 static_cast<CFDE_XMLElement*>(pXMLDocumentNode) 739 ->RemoveAttribute(L"xmlns:xfa"); 740 } 741 pDataElement->InsertChildNode(pXMLDocumentNode); 742 pDataXMLNode = pDataElement; 743 } 744 745 if (pDataXMLNode) { 746 CXFA_Node* pNode = 747 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataGroup); 748 if (!pNode) { 749 if (pDataXMLNode != pXMLDocumentNode) 750 delete pDataXMLNode; 751 return nullptr; 752 } 753 CFX_WideString wsLocalName; 754 static_cast<CFDE_XMLElement*>(pDataXMLNode)->GetLocalTagName(wsLocalName); 755 pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName); 756 if (!DataLoader(pNode, pDataXMLNode, true)) 757 return nullptr; 758 759 pNode->SetXMLMappingNode(pDataXMLNode); 760 if (pDataXMLNode != pXMLDocumentNode) 761 pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false); 762 return pNode; 763 } 764 return nullptr; 765} 766 767CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet( 768 CFDE_XMLNode* pXMLDocumentNode, 769 XFA_XDPPACKET ePacketID) { 770 CXFA_Node* pNode = nullptr; 771 if (ePacketID == XFA_XDPPACKET_LocaleSet) { 772 if (MatchNodeName(pXMLDocumentNode, 773 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName, 774 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI, 775 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) { 776 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet, 777 XFA_Element::LocaleSet); 778 if (!pNode) 779 return nullptr; 780 781 pNode->SetCData(XFA_ATTRIBUTE_Name, 782 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName); 783 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true)) 784 return nullptr; 785 } 786 } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) { 787 if (MatchNodeName(pXMLDocumentNode, 788 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName, 789 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI, 790 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) { 791 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet, 792 XFA_Element::ConnectionSet); 793 if (!pNode) 794 return nullptr; 795 796 pNode->SetCData(XFA_ATTRIBUTE_Name, 797 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName); 798 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true)) 799 return nullptr; 800 } 801 } else if (ePacketID == XFA_XDPPACKET_SourceSet) { 802 if (MatchNodeName(pXMLDocumentNode, 803 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName, 804 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI, 805 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) { 806 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet, 807 XFA_Element::SourceSet); 808 if (!pNode) 809 return nullptr; 810 811 pNode->SetCData(XFA_ATTRIBUTE_Name, 812 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName); 813 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true)) 814 return nullptr; 815 } 816 } 817 if (pNode) 818 pNode->SetXMLMappingNode(pXMLDocumentNode); 819 return pNode; 820} 821 822CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc( 823 CFDE_XMLNode* pXMLDocumentNode, 824 XFA_XDPPACKET ePacketID) { 825 if (!MatchNodeName(pXMLDocumentNode, 826 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName, 827 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI, 828 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags)) 829 return nullptr; 830 831 CXFA_Node* pNode = 832 m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_Element::Xdc); 833 if (!pNode) 834 return nullptr; 835 836 pNode->SetCData(XFA_ATTRIBUTE_Name, 837 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName); 838 pNode->SetXMLMappingNode(pXMLDocumentNode); 839 return pNode; 840} 841 842CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User( 843 CFDE_XMLNode* pXMLDocumentNode, 844 XFA_XDPPACKET ePacketID) { 845 CXFA_Node* pNode = 846 m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Packet); 847 if (!pNode) 848 return nullptr; 849 850 CFX_WideString wsName; 851 static_cast<CFDE_XMLElement*>(pXMLDocumentNode)->GetLocalTagName(wsName); 852 pNode->SetCData(XFA_ATTRIBUTE_Name, wsName); 853 if (!UserPacketLoader(pNode, pXMLDocumentNode)) 854 return nullptr; 855 856 pNode->SetXMLMappingNode(pXMLDocumentNode); 857 return pNode; 858} 859 860CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode, 861 CFDE_XMLNode* pXMLDoc) { 862 return pXFANode; 863} 864 865CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode, 866 CFDE_XMLNode* pXMLDoc, 867 bool bDoTransform) { 868 ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets); 869 return pXFANode; 870} 871 872CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode, 873 CFDE_XMLNode* pXMLDoc, 874 XFA_XDPPACKET ePacketID, 875 bool bUseAttribute) { 876 bool bOneOfPropertyFound = false; 877 for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild); 878 pXMLChild; 879 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { 880 switch (pXMLChild->GetType()) { 881 case FDE_XMLNODE_Element: { 882 CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild); 883 CFX_WideString wsTagName; 884 pXMLElement->GetLocalTagName(wsTagName); 885 XFA_Element eType = XFA_GetElementTypeForName(wsTagName.AsStringC()); 886 if (eType == XFA_Element::Unknown) 887 continue; 888 889 const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement( 890 pXFANode->GetElementType(), eType, ePacketID); 891 if (pPropertyInfo && 892 ((pPropertyInfo->uFlags & 893 (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) { 894 if (bOneOfPropertyFound) 895 break; 896 897 bOneOfPropertyFound = true; 898 } 899 CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType); 900 if (!pXFAChild) 901 return nullptr; 902 if (ePacketID == XFA_XDPPACKET_Config) 903 pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC()); 904 905 bool IsNeedValue = true; 906 for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count; 907 i++) { 908 CFX_WideString wsAttrQualifiedName; 909 CFX_WideString wsAttrName; 910 CFX_WideString wsAttrValue; 911 pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue); 912 GetAttributeLocalName(wsAttrQualifiedName.AsStringC(), wsAttrName); 913 if (wsAttrName == L"nil" && wsAttrValue == L"true") { 914 IsNeedValue = false; 915 } 916 const XFA_ATTRIBUTEINFO* lpAttrInfo = 917 XFA_GetAttributeByName(wsAttrName.AsStringC()); 918 if (!lpAttrInfo) 919 continue; 920 921 if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name && 922 lpAttrInfo->eName != XFA_ATTRIBUTE_Save) { 923 continue; 924 } 925 pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC()); 926 } 927 pXFANode->InsertChild(pXFAChild); 928 if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) { 929 if (ePacketID == XFA_XDPPACKET_Config) 930 ParseContentNode(pXFAChild, pXMLElement, ePacketID); 931 else 932 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); 933 934 break; 935 } 936 switch (pXFAChild->GetObjectType()) { 937 case XFA_ObjectType::ContentNode: 938 case XFA_ObjectType::TextNode: 939 case XFA_ObjectType::NodeC: 940 case XFA_ObjectType::NodeV: 941 if (IsNeedValue) 942 ParseContentNode(pXFAChild, pXMLElement, ePacketID); 943 break; 944 default: 945 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); 946 break; 947 } 948 } break; 949 case FDE_XMLNODE_Instruction: 950 ParseInstruction(pXFANode, static_cast<CFDE_XMLInstruction*>(pXMLChild), 951 ePacketID); 952 break; 953 default: 954 break; 955 } 956 } 957 return pXFANode; 958} 959 960void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode, 961 CFDE_XMLNode* pXMLNode, 962 XFA_XDPPACKET ePacketID) { 963 XFA_Element element = XFA_Element::Sharptext; 964 if (pXFANode->GetElementType() == XFA_Element::ExData) { 965 CFX_WideStringC wsContentType = 966 pXFANode->GetCData(XFA_ATTRIBUTE_ContentType); 967 if (wsContentType == L"text/html") 968 element = XFA_Element::SharpxHTML; 969 else if (wsContentType == L"text/xml") 970 element = XFA_Element::Sharpxml; 971 } 972 if (element == XFA_Element::SharpxHTML) 973 pXFANode->SetXMLMappingNode(pXMLNode); 974 975 CFX_WideString wsValue; 976 for (CFDE_XMLNode* pXMLChild = 977 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); 978 pXMLChild; 979 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { 980 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); 981 if (eNodeType == FDE_XMLNODE_Instruction) 982 continue; 983 984 if (element == XFA_Element::SharpxHTML) { 985 if (eNodeType != FDE_XMLNODE_Element) 986 break; 987 988 if (XFA_RecognizeRichText(static_cast<CFDE_XMLElement*>(pXMLChild))) 989 XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild), 990 wsValue); 991 } else if (element == XFA_Element::Sharpxml) { 992 if (eNodeType != FDE_XMLNODE_Element) 993 break; 994 995 ConvertXMLToPlainText(static_cast<CFDE_XMLElement*>(pXMLChild), wsValue); 996 } else { 997 if (eNodeType == FDE_XMLNODE_Element) 998 break; 999 if (eNodeType == FDE_XMLNODE_Text) 1000 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsValue); 1001 else if (eNodeType == FDE_XMLNODE_CharData) 1002 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsValue); 1003 } 1004 break; 1005 } 1006 if (!wsValue.IsEmpty()) { 1007 if (pXFANode->IsContentNode()) { 1008 CXFA_Node* pContentRawDataNode = 1009 m_pFactory->CreateNode(ePacketID, element); 1010 ASSERT(pContentRawDataNode); 1011 pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue); 1012 pXFANode->InsertChild(pContentRawDataNode); 1013 } else { 1014 pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue); 1015 } 1016 } 1017} 1018 1019void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode, 1020 CFDE_XMLNode* pXMLNode, 1021 XFA_XDPPACKET ePacketID) { 1022 for (CFDE_XMLNode* pXMLChild = 1023 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); 1024 pXMLChild; 1025 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { 1026 switch (pXMLChild->GetType()) { 1027 case FDE_XMLNODE_Element: { 1028 CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild); 1029 { 1030 CFX_WideString wsNamespaceURI; 1031 GetElementTagNamespaceURI(pXMLElement, wsNamespaceURI); 1032 if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" || 1033 wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" || 1034 wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") { 1035 continue; 1036 } 1037 } 1038 1039 XFA_Element eNodeType = XFA_Element::DataModel; 1040 if (eNodeType == XFA_Element::DataModel) { 1041 CFX_WideString wsDataNodeAttr; 1042 if (FindAttributeWithNS(pXMLElement, L"dataNode", 1043 L"http://www.xfa.org/schema/xfa-data/1.0/", 1044 wsDataNodeAttr)) { 1045 if (wsDataNodeAttr == L"dataGroup") 1046 eNodeType = XFA_Element::DataGroup; 1047 else if (wsDataNodeAttr == L"dataValue") 1048 eNodeType = XFA_Element::DataValue; 1049 } 1050 } 1051 CFX_WideString wsContentType; 1052 if (eNodeType == XFA_Element::DataModel) { 1053 if (FindAttributeWithNS(pXMLElement, L"contentType", 1054 L"http://www.xfa.org/schema/xfa-data/1.0/", 1055 wsContentType)) { 1056 if (!wsContentType.IsEmpty()) 1057 eNodeType = XFA_Element::DataValue; 1058 } 1059 } 1060 if (eNodeType == XFA_Element::DataModel) { 1061 for (CFDE_XMLNode* pXMLDataChild = 1062 pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild); 1063 pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem( 1064 CFDE_XMLNode::NextSibling)) { 1065 if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) { 1066 if (!XFA_RecognizeRichText( 1067 static_cast<CFDE_XMLElement*>(pXMLDataChild))) { 1068 eNodeType = XFA_Element::DataGroup; 1069 break; 1070 } 1071 } 1072 } 1073 } 1074 if (eNodeType == XFA_Element::DataModel) 1075 eNodeType = XFA_Element::DataValue; 1076 1077 CXFA_Node* pXFAChild = 1078 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType); 1079 if (!pXFAChild) 1080 return; 1081 1082 CFX_WideString wsNodeName; 1083 pXMLElement->GetLocalTagName(wsNodeName); 1084 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName); 1085 bool bNeedValue = true; 1086 for (int32_t i = 0; i < pXMLElement->CountAttributes(); ++i) { 1087 CFX_WideString wsQualifiedName; 1088 CFX_WideString wsValue; 1089 CFX_WideString wsName; 1090 CFX_WideString wsNS; 1091 pXMLElement->GetAttribute(i, wsQualifiedName, wsValue); 1092 if (!ResolveAttribute(pXMLElement, wsQualifiedName.AsStringC(), 1093 wsName, wsNS)) { 1094 continue; 1095 } 1096 if (wsName == L"nil" && wsValue == L"true") { 1097 bNeedValue = false; 1098 continue; 1099 } 1100 if (wsNS == L"http://www.xfa.com/schema/xfa-package/" || 1101 wsNS == L"http://www.xfa.org/schema/xfa-package/" || 1102 wsNS == L"http://www.w3.org/2001/XMLSchema-instance" || 1103 wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") { 1104 continue; 1105 } 1106 CXFA_Node* pXFAMetaData = m_pFactory->CreateNode( 1107 XFA_XDPPACKET_Datasets, XFA_Element::DataValue); 1108 if (!pXFAMetaData) 1109 return; 1110 1111 pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsName); 1112 pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsQualifiedName); 1113 pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsValue); 1114 pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains, 1115 XFA_ATTRIBUTEENUM_MetaData); 1116 pXFAChild->InsertChild(pXFAMetaData); 1117 pXFAMetaData->SetXMLMappingNode(pXMLElement); 1118 pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false); 1119 } 1120 1121 if (!bNeedValue) { 1122 CFX_WideString wsNilName(L"xsi:nil"); 1123 pXMLElement->RemoveAttribute(wsNilName.c_str()); 1124 } 1125 pXFANode->InsertChild(pXFAChild); 1126 if (eNodeType == XFA_Element::DataGroup) 1127 ParseDataGroup(pXFAChild, pXMLElement, ePacketID); 1128 else if (bNeedValue) 1129 ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets); 1130 1131 pXFAChild->SetXMLMappingNode(pXMLElement); 1132 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1133 continue; 1134 } 1135 case FDE_XMLNODE_CharData: { 1136 CFDE_XMLCharData* pXMLCharData = 1137 static_cast<CFDE_XMLCharData*>(pXMLChild); 1138 CFX_WideString wsCharData; 1139 pXMLCharData->GetCharData(wsCharData); 1140 if (IsStringAllWhitespace(wsCharData)) 1141 continue; 1142 1143 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, 1144 XFA_Element::DataValue); 1145 if (!pXFAChild) 1146 return; 1147 1148 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData); 1149 pXFANode->InsertChild(pXFAChild); 1150 pXFAChild->SetXMLMappingNode(pXMLCharData); 1151 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1152 continue; 1153 } 1154 case FDE_XMLNODE_Text: { 1155 CFDE_XMLText* pXMLText = static_cast<CFDE_XMLText*>(pXMLChild); 1156 CFX_WideString wsText; 1157 pXMLText->GetText(wsText); 1158 if (IsStringAllWhitespace(wsText)) 1159 continue; 1160 1161 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, 1162 XFA_Element::DataValue); 1163 if (!pXFAChild) 1164 return; 1165 1166 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText); 1167 pXFANode->InsertChild(pXFAChild); 1168 pXFAChild->SetXMLMappingNode(pXMLText); 1169 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1170 continue; 1171 } 1172 default: 1173 continue; 1174 } 1175 } 1176} 1177 1178void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode, 1179 CFDE_XMLNode* pXMLNode, 1180 XFA_XDPPACKET ePacketID) { 1181 CFX_WideTextBuf wsValueTextBuf; 1182 CFX_WideTextBuf wsCurValueTextBuf; 1183 bool bMarkAsCompound = false; 1184 CFDE_XMLNode* pXMLCurValueNode = nullptr; 1185 for (CFDE_XMLNode* pXMLChild = 1186 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); 1187 pXMLChild; 1188 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { 1189 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); 1190 if (eNodeType == FDE_XMLNODE_Instruction) 1191 continue; 1192 1193 CFX_WideString wsText; 1194 if (eNodeType == FDE_XMLNODE_Text) { 1195 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText); 1196 if (!pXMLCurValueNode) 1197 pXMLCurValueNode = pXMLChild; 1198 1199 wsCurValueTextBuf << wsText; 1200 } else if (eNodeType == FDE_XMLNODE_CharData) { 1201 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsText); 1202 if (!pXMLCurValueNode) 1203 pXMLCurValueNode = pXMLChild; 1204 1205 wsCurValueTextBuf << wsText; 1206 } else if (XFA_RecognizeRichText( 1207 static_cast<CFDE_XMLElement*>(pXMLChild))) { 1208 XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild), 1209 wsText); 1210 if (!pXMLCurValueNode) 1211 pXMLCurValueNode = pXMLChild; 1212 1213 wsCurValueTextBuf << wsText; 1214 } else { 1215 bMarkAsCompound = true; 1216 if (pXMLCurValueNode) { 1217 CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString(); 1218 if (!wsCurValue.IsEmpty()) { 1219 CXFA_Node* pXFAChild = 1220 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); 1221 if (!pXFAChild) 1222 return; 1223 1224 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L""); 1225 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue); 1226 pXFANode->InsertChild(pXFAChild); 1227 pXFAChild->SetXMLMappingNode(pXMLCurValueNode); 1228 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1229 wsValueTextBuf << wsCurValue; 1230 wsCurValueTextBuf.Clear(); 1231 } 1232 pXMLCurValueNode = nullptr; 1233 } 1234 CXFA_Node* pXFAChild = 1235 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); 1236 if (!pXFAChild) 1237 return; 1238 1239 CFX_WideString wsNodeStr; 1240 static_cast<CFDE_XMLElement*>(pXMLChild)->GetLocalTagName(wsNodeStr); 1241 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr); 1242 ParseDataValue(pXFAChild, pXMLChild, ePacketID); 1243 pXFANode->InsertChild(pXFAChild); 1244 pXFAChild->SetXMLMappingNode(pXMLChild); 1245 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1246 CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value); 1247 wsValueTextBuf << wsCurValue; 1248 } 1249 } 1250 if (pXMLCurValueNode) { 1251 CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString(); 1252 if (!wsCurValue.IsEmpty()) { 1253 if (bMarkAsCompound) { 1254 CXFA_Node* pXFAChild = 1255 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); 1256 if (!pXFAChild) 1257 return; 1258 1259 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L""); 1260 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue); 1261 pXFANode->InsertChild(pXFAChild); 1262 pXFAChild->SetXMLMappingNode(pXMLCurValueNode); 1263 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1264 } 1265 wsValueTextBuf << wsCurValue; 1266 wsCurValueTextBuf.Clear(); 1267 } 1268 pXMLCurValueNode = nullptr; 1269 } 1270 CFX_WideString wsNodeValue = wsValueTextBuf.MakeString(); 1271 pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue); 1272} 1273 1274void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode, 1275 CFDE_XMLInstruction* pXMLInstruction, 1276 XFA_XDPPACKET ePacketID) { 1277 if (!m_bDocumentParser) 1278 return; 1279 1280 CFX_WideString wsTargetName; 1281 pXMLInstruction->GetTargetName(wsTargetName); 1282 if (wsTargetName == L"originalXFAVersion") { 1283 CFX_WideString wsData; 1284 if (pXMLInstruction->GetData(0, wsData) && 1285 (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) != 1286 XFA_VERSION_UNKNOWN)) { 1287 wsData.clear(); 1288 if (pXMLInstruction->GetData(1, wsData) && 1289 wsData == L"v2.7-scripting:1") { 1290 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true); 1291 } 1292 } 1293 } else if (wsTargetName == L"acrobat") { 1294 CFX_WideString wsData; 1295 if (pXMLInstruction->GetData(0, wsData) && wsData == L"JavaScript") { 1296 if (pXMLInstruction->GetData(1, wsData) && wsData == L"strictScoping") { 1297 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true); 1298 } 1299 } 1300 } 1301} 1302 1303void CXFA_SimpleParser::CloseParser() { 1304 m_pXMLDoc.reset(); 1305 m_pStream.Reset(); 1306} 1307