cxfa_simple_parser.cpp revision 5ae9d0c6fd838a2967cca72aa5751b51dadc2769
1// Copyright 2016 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "xfa/fxfa/parser/cxfa_simple_parser.h"
8
9#include <utility>
10
11#include "core/fxcrt/fx_ext.h"
12#include "third_party/base/ptr_util.h"
13#include "xfa/fgas/crt/fgas_codepage.h"
14#include "xfa/fxfa/fxfa.h"
15#include "xfa/fxfa/parser/cxfa_document.h"
16#include "xfa/fxfa/parser/cxfa_widetextread.h"
17#include "xfa/fxfa/parser/cxfa_xml_parser.h"
18#include "xfa/fxfa/parser/xfa_basic_data.h"
19#include "xfa/fxfa/parser/xfa_utils.h"
20#include "xfa/fxfa/xfa_checksum.h"
21
22namespace {
23
24CFDE_XMLNode* GetDocumentNode(CFDE_XMLDoc* pXMLDoc,
25                              bool bVerifyWellFormness = false) {
26  if (!pXMLDoc)
27    return nullptr;
28
29  for (CFDE_XMLNode* pXMLNode =
30           pXMLDoc->GetRoot()->GetNodeItem(CFDE_XMLNode::FirstChild);
31       pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
32    if (pXMLNode->GetType() != FDE_XMLNODE_Element)
33      continue;
34
35    if (!bVerifyWellFormness)
36      return pXMLNode;
37
38    for (CFDE_XMLNode* pNextNode =
39             pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling);
40         pNextNode;
41         pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
42      if (pNextNode->GetType() == FDE_XMLNODE_Element)
43        return nullptr;
44    }
45    return pXMLNode;
46  }
47  return nullptr;
48}
49
50void GetElementTagNamespaceURI(CFDE_XMLElement* pElement,
51                               CFX_WideString& wsNamespaceURI) {
52  CFX_WideString wsNodeStr;
53  pElement->GetNamespacePrefix(wsNodeStr);
54  if (!XFA_FDEExtension_ResolveNamespaceQualifier(
55          pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) {
56    wsNamespaceURI.clear();
57  }
58}
59
60bool MatchNodeName(CFDE_XMLNode* pNode,
61                   const CFX_WideStringC& wsLocalTagName,
62                   const CFX_WideStringC& wsNamespaceURIPrefix,
63                   uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
64  if (!pNode || pNode->GetType() != FDE_XMLNODE_Element)
65    return false;
66
67  CFDE_XMLElement* pElement = reinterpret_cast<CFDE_XMLElement*>(pNode);
68  CFX_WideString wsNodeStr;
69  pElement->GetLocalTagName(wsNodeStr);
70  if (wsNodeStr != wsLocalTagName)
71    return false;
72
73  GetElementTagNamespaceURI(pElement, wsNodeStr);
74  if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
75    return true;
76  if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
77    return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
78           wsNamespaceURIPrefix;
79  }
80  return wsNodeStr == wsNamespaceURIPrefix;
81}
82
83bool GetAttributeLocalName(const CFX_WideStringC& wsAttributeName,
84                           CFX_WideString& wsLocalAttrName) {
85  CFX_WideString wsAttrName(wsAttributeName);
86  FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
87  if (iFind < 0) {
88    wsLocalAttrName = wsAttrName;
89    return false;
90  }
91  wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1);
92  return true;
93}
94
95bool ResolveAttribute(CFDE_XMLElement* pElement,
96                      const CFX_WideStringC& wsAttributeName,
97                      CFX_WideString& wsLocalAttrName,
98                      CFX_WideString& wsNamespaceURI) {
99  CFX_WideString wsAttrName(wsAttributeName);
100  CFX_WideString wsNSPrefix;
101  if (GetAttributeLocalName(wsAttributeName, wsLocalAttrName)) {
102    wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() -
103                                 wsLocalAttrName.GetLength() - 1);
104  }
105  if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" ||
106      wsNSPrefix == L"xml") {
107    return false;
108  }
109  if (!XFA_FDEExtension_ResolveNamespaceQualifier(
110          pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) {
111    wsNamespaceURI.clear();
112    return false;
113  }
114  return true;
115}
116
117bool FindAttributeWithNS(CFDE_XMLElement* pElement,
118                         const CFX_WideStringC& wsLocalAttributeName,
119                         const CFX_WideStringC& wsNamespaceURIPrefix,
120                         CFX_WideString& wsValue,
121                         bool bMatchNSAsPrefix = false) {
122  if (!pElement)
123    return false;
124
125  CFX_WideString wsAttrName;
126  CFX_WideString wsAttrValue;
127  CFX_WideString wsAttrNS;
128  for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount;
129       i++) {
130    pElement->GetAttribute(i, wsAttrName, wsAttrValue);
131    FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
132    CFX_WideString wsNSPrefix;
133    if (iFind < 0) {
134      if (wsLocalAttributeName != wsAttrName)
135        continue;
136    } else {
137      if (wsLocalAttributeName !=
138          wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) {
139        continue;
140      }
141      wsNSPrefix = wsAttrName.Left(iFind);
142    }
143    if (!XFA_FDEExtension_ResolveNamespaceQualifier(
144            pElement, wsNSPrefix.AsStringC(), wsAttrNS)) {
145      continue;
146    }
147    if (bMatchNSAsPrefix) {
148      if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
149          wsNamespaceURIPrefix) {
150        continue;
151      }
152    } else {
153      if (wsAttrNS != wsNamespaceURIPrefix)
154        continue;
155    }
156    wsValue = wsAttrValue;
157    return true;
158  }
159  return false;
160}
161
162CFDE_XMLNode* GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) {
163  if (MatchNodeName(pXMLDocumentNode,
164                    XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
165                    XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
166                    XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
167    return pXMLDocumentNode;
168  }
169  if (!MatchNodeName(pXMLDocumentNode,
170                     XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
171                     XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
172                     XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
173    return nullptr;
174  }
175  for (CFDE_XMLNode* pDatasetsNode =
176           pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
177       pDatasetsNode;
178       pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
179    if (!MatchNodeName(pDatasetsNode,
180                       XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
181                       XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
182                       XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
183      continue;
184    }
185    return pDatasetsNode;
186  }
187  return nullptr;
188}
189
190bool IsStringAllWhitespace(CFX_WideString wsText) {
191  wsText.TrimRight(L"\x20\x9\xD\xA");
192  return wsText.IsEmpty();
193}
194
195void ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode,
196                           CFX_WideString& wsOutput) {
197  for (CFDE_XMLNode* pXMLChild =
198           pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
199       pXMLChild;
200       pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
201    switch (pXMLChild->GetType()) {
202      case FDE_XMLNODE_Element: {
203        CFX_WideString wsTextData;
204        static_cast<CFDE_XMLElement*>(pXMLChild)->GetTextData(wsTextData);
205        wsTextData += L"\n";
206        wsOutput += wsTextData;
207        break;
208      }
209      case FDE_XMLNODE_Text: {
210        CFX_WideString wsText;
211        static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
212        if (IsStringAllWhitespace(wsText))
213          continue;
214
215        wsOutput = wsText;
216        break;
217      }
218      case FDE_XMLNODE_CharData: {
219        CFX_WideString wsCharData;
220        static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsCharData);
221        if (IsStringAllWhitespace(wsCharData))
222          continue;
223
224        wsOutput = wsCharData;
225        break;
226      }
227      default:
228        ASSERT(false);
229        break;
230    }
231  }
232}
233
234const XFA_PACKETINFO* GetPacketByName(const CFX_WideStringC& wsName) {
235  if (wsName.IsEmpty())
236    return nullptr;
237
238  uint32_t uHash = FX_HashCode_GetW(wsName, false);
239  int32_t iStart = 0;
240  int32_t iEnd = g_iXFAPacketCount - 1;
241  do {
242    int32_t iMid = (iStart + iEnd) / 2;
243    const XFA_PACKETINFO* pInfo = g_XFAPacketData + iMid;
244    if (uHash == pInfo->uHash)
245      return pInfo;
246    if (uHash < pInfo->uHash)
247      iEnd = iMid - 1;
248    else
249      iStart = iMid + 1;
250  } while (iStart <= iEnd);
251  return nullptr;
252}
253
254}  // namespace
255
256bool XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) {
257  if (pRichTextXMLNode) {
258    CFX_WideString wsNamespaceURI;
259    GetElementTagNamespaceURI(pRichTextXMLNode, wsNamespaceURI);
260    if (wsNamespaceURI == L"http://www.w3.org/1999/xhtml")
261      return true;
262  }
263  return false;
264}
265
266CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory,
267                                     bool bDocumentParser)
268    : m_pXMLParser(nullptr),
269      m_pXMLDoc(nullptr),
270      m_pStream(nullptr),
271      m_pFileRead(nullptr),
272      m_pFactory(pFactory),
273      m_pRootNode(nullptr),
274      m_ePacketID(XFA_XDPPACKET_UNKNOWN),
275      m_bDocumentParser(bDocumentParser) {}
276
277CXFA_SimpleParser::~CXFA_SimpleParser() {}
278
279void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
280  m_pFactory = pFactory;
281}
282
283int32_t CXFA_SimpleParser::StartParse(
284    const CFX_RetainPtr<IFX_SeekableReadStream>& pStream,
285    XFA_XDPPACKET ePacketID) {
286  CloseParser();
287  m_pFileRead = pStream;
288  m_pStream = IFGAS_Stream::CreateStream(
289      pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text);
290  if (!m_pStream)
291    return XFA_PARSESTATUS_StreamErr;
292
293  uint16_t wCodePage = m_pStream->GetCodePage();
294  if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
295      wCodePage != FX_CODEPAGE_UTF8) {
296    m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
297  }
298  m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>();
299  auto pNewParser =
300      pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream);
301  m_pXMLParser = pNewParser.get();
302  if (!m_pXMLDoc->LoadXML(std::move(pNewParser)))
303    return XFA_PARSESTATUS_StatusErr;
304
305  m_ePacketID = ePacketID;
306  return XFA_PARSESTATUS_Ready;
307}
308
309int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) {
310  if (!m_pXMLDoc || m_ePacketID == XFA_XDPPACKET_UNKNOWN)
311    return XFA_PARSESTATUS_StatusErr;
312
313  int32_t iRet = m_pXMLDoc->DoLoad(pPause);
314  if (iRet < 0)
315    return XFA_PARSESTATUS_SyntaxErr;
316  if (iRet < 100)
317    return iRet / 2;
318
319  m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
320  m_pXMLDoc->CloseXML();
321  m_pStream.Reset();
322  if (!m_pRootNode)
323    return XFA_PARSESTATUS_StatusErr;
324
325  return XFA_PARSESTATUS_Done;
326}
327
328int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML,
329                                        CFDE_XMLNode*& pXMLNode,
330                                        IFX_Pause* pPause) {
331  CloseParser();
332  pXMLNode = nullptr;
333  m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>();
334  auto pStream = pdfium::MakeRetain<CXFA_WideTextRead>(wsXML);
335  auto pParser =
336      pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), pStream);
337  pParser->m_dwCheckStatus = 0x03;
338  if (!m_pXMLDoc->LoadXML(std::move(pParser)))
339    return XFA_PARSESTATUS_StatusErr;
340
341  int32_t iRet = m_pXMLDoc->DoLoad(pPause);
342  if (iRet < 0 || iRet >= 100)
343    m_pXMLDoc->CloseXML();
344  if (iRet < 0)
345    return XFA_PARSESTATUS_SyntaxErr;
346  if (iRet < 100)
347    return iRet / 2;
348
349  pXMLNode = GetDocumentNode(m_pXMLDoc.get());
350  return XFA_PARSESTATUS_Done;
351}
352
353void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
354                                         CFDE_XMLNode* pXMLNode) {
355  XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID();
356  if (ePacketID == XFA_XDPPACKET_Datasets) {
357    if (pXFANode->GetElementType() == XFA_Element::DataValue) {
358      for (CFDE_XMLNode* pXMLChild =
359               pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
360           pXMLChild;
361           pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
362        FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
363        if (eNodeType == FDE_XMLNODE_Instruction)
364          continue;
365
366        if (eNodeType == FDE_XMLNODE_Element) {
367          CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
368                                                        XFA_Element::DataValue);
369          if (!pXFAChild)
370            return;
371
372          CFX_WideString wsNodeStr;
373          CFDE_XMLElement* child = static_cast<CFDE_XMLElement*>(pXMLChild);
374          child->GetLocalTagName(wsNodeStr);
375          pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
376          CFX_WideString wsChildValue;
377          XFA_GetPlainTextFromRichText(child, wsChildValue);
378          if (!wsChildValue.IsEmpty())
379            pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue);
380
381          pXFANode->InsertChild(pXFAChild);
382          pXFAChild->SetXMLMappingNode(pXMLChild);
383          pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
384          break;
385        }
386      }
387      m_pRootNode = pXFANode;
388    } else {
389      m_pRootNode = DataLoader(pXFANode, pXMLNode, true);
390    }
391  } else if (pXFANode->IsContentNode()) {
392    ParseContentNode(pXFANode, pXMLNode, ePacketID);
393    m_pRootNode = pXFANode;
394  } else {
395    m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
396  }
397}
398
399CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
400  return m_pRootNode;
401}
402
403CFDE_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
404  return m_pXMLDoc.get();
405}
406
407bool XFA_FDEExtension_ResolveNamespaceQualifier(
408    CFDE_XMLElement* pNode,
409    const CFX_WideStringC& wsQualifier,
410    CFX_WideString& wsNamespaceURI) {
411  if (!pNode)
412    return false;
413
414  CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root);
415  CFX_WideString wsNSAttribute;
416  bool bRet = false;
417  if (wsQualifier.IsEmpty()) {
418    wsNSAttribute = L"xmlns";
419    bRet = true;
420  } else {
421    wsNSAttribute = L"xmlns:" + wsQualifier;
422  }
423  for (; pNode != pFakeRoot; pNode = static_cast<CFDE_XMLElement*>(
424                                 pNode->GetNodeItem(CFDE_XMLNode::Parent))) {
425    if (pNode->GetType() != FDE_XMLNODE_Element)
426      continue;
427
428    if (pNode->HasAttribute(wsNSAttribute.c_str())) {
429      pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI);
430      return true;
431    }
432  }
433  wsNamespaceURI.clear();
434  return bRet;
435}
436
437CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode,
438                                               XFA_XDPPACKET ePacketID) {
439  switch (ePacketID) {
440    case XFA_XDPPACKET_UNKNOWN:
441      return nullptr;
442    case XFA_XDPPACKET_XDP:
443      return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID);
444    case XFA_XDPPACKET_Config:
445      return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID);
446    case XFA_XDPPACKET_Template:
447    case XFA_XDPPACKET_Form:
448      return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID);
449    case XFA_XDPPACKET_Datasets:
450      return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID);
451    case XFA_XDPPACKET_Xdc:
452      return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID);
453    case XFA_XDPPACKET_LocaleSet:
454    case XFA_XDPPACKET_ConnectionSet:
455    case XFA_XDPPACKET_SourceSet:
456      return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode,
457                                                        ePacketID);
458    default:
459      return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID);
460  }
461}
462
463CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
464    CFDE_XMLNode* pXMLDocumentNode,
465    XFA_XDPPACKET ePacketID) {
466  if (!MatchNodeName(pXMLDocumentNode,
467                     XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
468                     XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
469                     XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
470    return nullptr;
471  }
472  CXFA_Node* pXFARootNode =
473      m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Xfa);
474  if (!pXFARootNode)
475    return nullptr;
476
477  m_pRootNode = pXFARootNode;
478  pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa");
479  {
480    CFDE_XMLElement* pElement = static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
481    int32_t iAttributeCount = pElement->CountAttributes();
482    for (int32_t i = 0; i < iAttributeCount; i++) {
483      CFX_WideString wsAttriName, wsAttriValue;
484      pElement->GetAttribute(i, wsAttriName, wsAttriValue);
485      if (wsAttriName == L"uuid")
486        pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue);
487      else if (wsAttriName == L"timeStamp")
488        pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue);
489    }
490  }
491
492  CFDE_XMLNode* pXMLConfigDOMRoot = nullptr;
493  CXFA_Node* pXFAConfigDOMRoot = nullptr;
494  {
495    for (CFDE_XMLNode* pChildItem =
496             pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
497         pChildItem;
498         pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
499      const XFA_PACKETINFO* pPacketInfo =
500          XFA_GetPacketByIndex(XFA_PACKET_Config);
501      if (!MatchNodeName(pChildItem, pPacketInfo->pName, pPacketInfo->pURI,
502                         pPacketInfo->eFlags)) {
503        continue;
504      }
505      if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
506        return nullptr;
507      }
508      pXMLConfigDOMRoot = pChildItem;
509      pXFAConfigDOMRoot =
510          ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config);
511      pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
512    }
513  }
514
515  CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr;
516  CFDE_XMLNode* pXMLFormDOMRoot = nullptr;
517  CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr;
518  {
519    for (CFDE_XMLNode* pChildItem =
520             pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
521         pChildItem;
522         pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
523      if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element)
524        continue;
525      if (pChildItem == pXMLConfigDOMRoot)
526        continue;
527
528      CFDE_XMLElement* pElement =
529          reinterpret_cast<CFDE_XMLElement*>(pChildItem);
530      CFX_WideString wsPacketName;
531      pElement->GetLocalTagName(wsPacketName);
532      const XFA_PACKETINFO* pPacketInfo =
533          GetPacketByName(wsPacketName.AsStringC());
534      if (pPacketInfo && pPacketInfo->pURI) {
535        if (!MatchNodeName(pElement, pPacketInfo->pName, pPacketInfo->pURI,
536                           pPacketInfo->eFlags)) {
537          pPacketInfo = nullptr;
538        }
539      }
540      XFA_XDPPACKET ePacket =
541          pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER;
542      if (ePacket == XFA_XDPPACKET_XDP)
543        continue;
544      if (ePacket == XFA_XDPPACKET_Datasets) {
545        if (pXMLDatasetsDOMRoot)
546          return nullptr;
547
548        pXMLDatasetsDOMRoot = pElement;
549      } else if (ePacket == XFA_XDPPACKET_Form) {
550        if (pXMLFormDOMRoot)
551          return nullptr;
552
553        pXMLFormDOMRoot = pElement;
554      } else if (ePacket == XFA_XDPPACKET_Template) {
555        if (pXMLTemplateDOMRoot) {
556          // Found a duplicate template packet.
557          return nullptr;
558        }
559        CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
560        if (pPacketNode) {
561          pXMLTemplateDOMRoot = pElement;
562          pXFARootNode->InsertChild(pPacketNode);
563        }
564      } else {
565        CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
566        if (pPacketNode) {
567          if (pPacketInfo &&
568              (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
569              pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
570            return nullptr;
571          }
572          pXFARootNode->InsertChild(pPacketNode);
573        }
574      }
575    }
576  }
577
578  if (!pXMLTemplateDOMRoot) {
579    // No template is found.
580    return nullptr;
581  }
582  if (pXMLDatasetsDOMRoot) {
583    CXFA_Node* pPacketNode =
584        ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets);
585    if (pPacketNode)
586      pXFARootNode->InsertChild(pPacketNode);
587  }
588  if (pXMLFormDOMRoot) {
589    CXFA_Node* pPacketNode =
590        ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form);
591    if (pPacketNode)
592      pXFARootNode->InsertChild(pPacketNode);
593  }
594  pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
595  return pXFARootNode;
596}
597
598CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
599    CFDE_XMLNode* pXMLDocumentNode,
600    XFA_XDPPACKET ePacketID) {
601  if (!MatchNodeName(pXMLDocumentNode,
602                     XFA_GetPacketByIndex(XFA_PACKET_Config)->pName,
603                     XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI,
604                     XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) {
605    return nullptr;
606  }
607  CXFA_Node* pNode =
608      m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_Element::Config);
609  if (!pNode)
610    return nullptr;
611
612  pNode->SetCData(XFA_ATTRIBUTE_Name,
613                  XFA_GetPacketByIndex(XFA_PACKET_Config)->pName);
614  if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
615    return nullptr;
616
617  pNode->SetXMLMappingNode(pXMLDocumentNode);
618  return pNode;
619}
620
621CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm(
622    CFDE_XMLNode* pXMLDocumentNode,
623    XFA_XDPPACKET ePacketID) {
624  CXFA_Node* pNode = nullptr;
625  if (ePacketID == XFA_XDPPACKET_Template) {
626    if (MatchNodeName(pXMLDocumentNode,
627                      XFA_GetPacketByIndex(XFA_PACKET_Template)->pName,
628                      XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI,
629                      XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) {
630      pNode =
631          m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_Element::Template);
632      if (!pNode)
633        return nullptr;
634
635      pNode->SetCData(XFA_ATTRIBUTE_Name,
636                      XFA_GetPacketByIndex(XFA_PACKET_Template)->pName);
637      if (m_bDocumentParser) {
638        CFX_WideString wsNamespaceURI;
639        CFDE_XMLElement* pXMLDocumentElement =
640            static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
641        pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI);
642        if (wsNamespaceURI.IsEmpty())
643          pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI);
644
645        pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
646      }
647      if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
648        return nullptr;
649    }
650  } else if (ePacketID == XFA_XDPPACKET_Form) {
651    if (MatchNodeName(pXMLDocumentNode,
652                      XFA_GetPacketByIndex(XFA_PACKET_Form)->pName,
653                      XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI,
654                      XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) {
655      CFDE_XMLElement* pXMLDocumentElement =
656          static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
657      CFX_WideString wsChecksum;
658      pXMLDocumentElement->GetString(L"checksum", wsChecksum);
659      if (wsChecksum.GetLength() != 28 ||
660          m_pXMLParser->m_dwCheckStatus != 0x03) {
661        return nullptr;
662      }
663      std::unique_ptr<CXFA_ChecksumContext> pChecksum(new CXFA_ChecksumContext);
664      pChecksum->StartChecksum();
665      pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
666                                m_pXMLParser->m_nSize[0]);
667      pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
668                                m_pXMLParser->m_nSize[1]);
669      pChecksum->FinishChecksum();
670      CFX_ByteString bsCheck = pChecksum->GetChecksum();
671      if (bsCheck != wsChecksum.UTF8Encode())
672        return nullptr;
673
674      pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_Element::Form);
675      if (!pNode)
676        return nullptr;
677
678      pNode->SetCData(XFA_ATTRIBUTE_Name,
679                      XFA_GetPacketByIndex(XFA_PACKET_Form)->pName);
680      pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC());
681      CXFA_Node* pTemplateRoot =
682          m_pRootNode->GetFirstChildByClass(XFA_Element::Template);
683      CXFA_Node* pTemplateChosen =
684          pTemplateRoot
685              ? pTemplateRoot->GetFirstChildByClass(XFA_Element::Subform)
686              : nullptr;
687      bool bUseAttribute = true;
688      if (pTemplateChosen &&
689          pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) !=
690              XFA_ATTRIBUTEENUM_Auto) {
691        bUseAttribute = false;
692      }
693      if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute))
694        return nullptr;
695    }
696  }
697  if (pNode)
698    pNode->SetXMLMappingNode(pXMLDocumentNode);
699
700  return pNode;
701}
702
703CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
704    CFDE_XMLNode* pXMLDocumentNode,
705    XFA_XDPPACKET ePacketID) {
706  CFDE_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
707  if (pDatasetsXMLNode) {
708    CXFA_Node* pNode =
709        m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataModel);
710    if (!pNode)
711      return nullptr;
712
713    pNode->SetCData(XFA_ATTRIBUTE_Name,
714                    XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName);
715    if (!DataLoader(pNode, pDatasetsXMLNode, false))
716      return nullptr;
717
718    pNode->SetXMLMappingNode(pDatasetsXMLNode);
719    return pNode;
720  }
721
722  CFDE_XMLNode* pDataXMLNode = nullptr;
723  if (MatchNodeName(pXMLDocumentNode, L"data",
724                    XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
725                    XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
726    static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
727        ->RemoveAttribute(L"xmlns:xfa");
728    pDataXMLNode = pXMLDocumentNode;
729  } else {
730    CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data");
731    CFDE_XMLNode* pParentXMLNode =
732        pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent);
733    if (pParentXMLNode)
734      pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
735
736    ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element);
737    if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) {
738      static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
739          ->RemoveAttribute(L"xmlns:xfa");
740    }
741    pDataElement->InsertChildNode(pXMLDocumentNode);
742    pDataXMLNode = pDataElement;
743  }
744
745  if (pDataXMLNode) {
746    CXFA_Node* pNode =
747        m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataGroup);
748    if (!pNode) {
749      if (pDataXMLNode != pXMLDocumentNode)
750        delete pDataXMLNode;
751      return nullptr;
752    }
753    CFX_WideString wsLocalName;
754    static_cast<CFDE_XMLElement*>(pDataXMLNode)->GetLocalTagName(wsLocalName);
755    pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName);
756    if (!DataLoader(pNode, pDataXMLNode, true))
757      return nullptr;
758
759    pNode->SetXMLMappingNode(pDataXMLNode);
760    if (pDataXMLNode != pXMLDocumentNode)
761      pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
762    return pNode;
763  }
764  return nullptr;
765}
766
767CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
768    CFDE_XMLNode* pXMLDocumentNode,
769    XFA_XDPPACKET ePacketID) {
770  CXFA_Node* pNode = nullptr;
771  if (ePacketID == XFA_XDPPACKET_LocaleSet) {
772    if (MatchNodeName(pXMLDocumentNode,
773                      XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName,
774                      XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI,
775                      XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) {
776      pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet,
777                                     XFA_Element::LocaleSet);
778      if (!pNode)
779        return nullptr;
780
781      pNode->SetCData(XFA_ATTRIBUTE_Name,
782                      XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName);
783      if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
784        return nullptr;
785    }
786  } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) {
787    if (MatchNodeName(pXMLDocumentNode,
788                      XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName,
789                      XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI,
790                      XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) {
791      pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet,
792                                     XFA_Element::ConnectionSet);
793      if (!pNode)
794        return nullptr;
795
796      pNode->SetCData(XFA_ATTRIBUTE_Name,
797                      XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName);
798      if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
799        return nullptr;
800    }
801  } else if (ePacketID == XFA_XDPPACKET_SourceSet) {
802    if (MatchNodeName(pXMLDocumentNode,
803                      XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName,
804                      XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI,
805                      XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) {
806      pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet,
807                                     XFA_Element::SourceSet);
808      if (!pNode)
809        return nullptr;
810
811      pNode->SetCData(XFA_ATTRIBUTE_Name,
812                      XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName);
813      if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
814        return nullptr;
815    }
816  }
817  if (pNode)
818    pNode->SetXMLMappingNode(pXMLDocumentNode);
819  return pNode;
820}
821
822CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
823    CFDE_XMLNode* pXMLDocumentNode,
824    XFA_XDPPACKET ePacketID) {
825  if (!MatchNodeName(pXMLDocumentNode,
826                     XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName,
827                     XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI,
828                     XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags))
829    return nullptr;
830
831  CXFA_Node* pNode =
832      m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_Element::Xdc);
833  if (!pNode)
834    return nullptr;
835
836  pNode->SetCData(XFA_ATTRIBUTE_Name,
837                  XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName);
838  pNode->SetXMLMappingNode(pXMLDocumentNode);
839  return pNode;
840}
841
842CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
843    CFDE_XMLNode* pXMLDocumentNode,
844    XFA_XDPPACKET ePacketID) {
845  CXFA_Node* pNode =
846      m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Packet);
847  if (!pNode)
848    return nullptr;
849
850  CFX_WideString wsName;
851  static_cast<CFDE_XMLElement*>(pXMLDocumentNode)->GetLocalTagName(wsName);
852  pNode->SetCData(XFA_ATTRIBUTE_Name, wsName);
853  if (!UserPacketLoader(pNode, pXMLDocumentNode))
854    return nullptr;
855
856  pNode->SetXMLMappingNode(pXMLDocumentNode);
857  return pNode;
858}
859
860CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
861                                               CFDE_XMLNode* pXMLDoc) {
862  return pXFANode;
863}
864
865CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
866                                         CFDE_XMLNode* pXMLDoc,
867                                         bool bDoTransform) {
868  ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets);
869  return pXFANode;
870}
871
872CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
873                                           CFDE_XMLNode* pXMLDoc,
874                                           XFA_XDPPACKET ePacketID,
875                                           bool bUseAttribute) {
876  bool bOneOfPropertyFound = false;
877  for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild);
878       pXMLChild;
879       pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
880    switch (pXMLChild->GetType()) {
881      case FDE_XMLNODE_Element: {
882        CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
883        CFX_WideString wsTagName;
884        pXMLElement->GetLocalTagName(wsTagName);
885        XFA_Element eType = XFA_GetElementTypeForName(wsTagName.AsStringC());
886        if (eType == XFA_Element::Unknown)
887          continue;
888
889        const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement(
890            pXFANode->GetElementType(), eType, ePacketID);
891        if (pPropertyInfo &&
892            ((pPropertyInfo->uFlags &
893              (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) {
894          if (bOneOfPropertyFound)
895            break;
896
897          bOneOfPropertyFound = true;
898        }
899        CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
900        if (!pXFAChild)
901          return nullptr;
902        if (ePacketID == XFA_XDPPACKET_Config)
903          pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC());
904
905        bool IsNeedValue = true;
906        for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count;
907             i++) {
908          CFX_WideString wsAttrQualifiedName;
909          CFX_WideString wsAttrName;
910          CFX_WideString wsAttrValue;
911          pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue);
912          GetAttributeLocalName(wsAttrQualifiedName.AsStringC(), wsAttrName);
913          if (wsAttrName == L"nil" && wsAttrValue == L"true") {
914            IsNeedValue = false;
915          }
916          const XFA_ATTRIBUTEINFO* lpAttrInfo =
917              XFA_GetAttributeByName(wsAttrName.AsStringC());
918          if (!lpAttrInfo)
919            continue;
920
921          if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name &&
922              lpAttrInfo->eName != XFA_ATTRIBUTE_Save) {
923            continue;
924          }
925          pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC());
926        }
927        pXFANode->InsertChild(pXFAChild);
928        if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
929          if (ePacketID == XFA_XDPPACKET_Config)
930            ParseContentNode(pXFAChild, pXMLElement, ePacketID);
931          else
932            NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
933
934          break;
935        }
936        switch (pXFAChild->GetObjectType()) {
937          case XFA_ObjectType::ContentNode:
938          case XFA_ObjectType::TextNode:
939          case XFA_ObjectType::NodeC:
940          case XFA_ObjectType::NodeV:
941            if (IsNeedValue)
942              ParseContentNode(pXFAChild, pXMLElement, ePacketID);
943            break;
944          default:
945            NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
946            break;
947        }
948      } break;
949      case FDE_XMLNODE_Instruction:
950        ParseInstruction(pXFANode, static_cast<CFDE_XMLInstruction*>(pXMLChild),
951                         ePacketID);
952        break;
953      default:
954        break;
955    }
956  }
957  return pXFANode;
958}
959
960void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
961                                         CFDE_XMLNode* pXMLNode,
962                                         XFA_XDPPACKET ePacketID) {
963  XFA_Element element = XFA_Element::Sharptext;
964  if (pXFANode->GetElementType() == XFA_Element::ExData) {
965    CFX_WideStringC wsContentType =
966        pXFANode->GetCData(XFA_ATTRIBUTE_ContentType);
967    if (wsContentType == L"text/html")
968      element = XFA_Element::SharpxHTML;
969    else if (wsContentType == L"text/xml")
970      element = XFA_Element::Sharpxml;
971  }
972  if (element == XFA_Element::SharpxHTML)
973    pXFANode->SetXMLMappingNode(pXMLNode);
974
975  CFX_WideString wsValue;
976  for (CFDE_XMLNode* pXMLChild =
977           pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
978       pXMLChild;
979       pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
980    FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
981    if (eNodeType == FDE_XMLNODE_Instruction)
982      continue;
983
984    if (element == XFA_Element::SharpxHTML) {
985      if (eNodeType != FDE_XMLNODE_Element)
986        break;
987
988      if (XFA_RecognizeRichText(static_cast<CFDE_XMLElement*>(pXMLChild)))
989        XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
990                                     wsValue);
991    } else if (element == XFA_Element::Sharpxml) {
992      if (eNodeType != FDE_XMLNODE_Element)
993        break;
994
995      ConvertXMLToPlainText(static_cast<CFDE_XMLElement*>(pXMLChild), wsValue);
996    } else {
997      if (eNodeType == FDE_XMLNODE_Element)
998        break;
999      if (eNodeType == FDE_XMLNODE_Text)
1000        static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsValue);
1001      else if (eNodeType == FDE_XMLNODE_CharData)
1002        static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsValue);
1003    }
1004    break;
1005  }
1006  if (!wsValue.IsEmpty()) {
1007    if (pXFANode->IsContentNode()) {
1008      CXFA_Node* pContentRawDataNode =
1009          m_pFactory->CreateNode(ePacketID, element);
1010      ASSERT(pContentRawDataNode);
1011      pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1012      pXFANode->InsertChild(pContentRawDataNode);
1013    } else {
1014      pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1015    }
1016  }
1017}
1018
1019void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
1020                                       CFDE_XMLNode* pXMLNode,
1021                                       XFA_XDPPACKET ePacketID) {
1022  for (CFDE_XMLNode* pXMLChild =
1023           pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
1024       pXMLChild;
1025       pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1026    switch (pXMLChild->GetType()) {
1027      case FDE_XMLNODE_Element: {
1028        CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
1029        {
1030          CFX_WideString wsNamespaceURI;
1031          GetElementTagNamespaceURI(pXMLElement, wsNamespaceURI);
1032          if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" ||
1033              wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" ||
1034              wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") {
1035            continue;
1036          }
1037        }
1038
1039        XFA_Element eNodeType = XFA_Element::DataModel;
1040        if (eNodeType == XFA_Element::DataModel) {
1041          CFX_WideString wsDataNodeAttr;
1042          if (FindAttributeWithNS(pXMLElement, L"dataNode",
1043                                  L"http://www.xfa.org/schema/xfa-data/1.0/",
1044                                  wsDataNodeAttr)) {
1045            if (wsDataNodeAttr == L"dataGroup")
1046              eNodeType = XFA_Element::DataGroup;
1047            else if (wsDataNodeAttr == L"dataValue")
1048              eNodeType = XFA_Element::DataValue;
1049          }
1050        }
1051        CFX_WideString wsContentType;
1052        if (eNodeType == XFA_Element::DataModel) {
1053          if (FindAttributeWithNS(pXMLElement, L"contentType",
1054                                  L"http://www.xfa.org/schema/xfa-data/1.0/",
1055                                  wsContentType)) {
1056            if (!wsContentType.IsEmpty())
1057              eNodeType = XFA_Element::DataValue;
1058          }
1059        }
1060        if (eNodeType == XFA_Element::DataModel) {
1061          for (CFDE_XMLNode* pXMLDataChild =
1062                   pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild);
1063               pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
1064                                  CFDE_XMLNode::NextSibling)) {
1065            if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) {
1066              if (!XFA_RecognizeRichText(
1067                      static_cast<CFDE_XMLElement*>(pXMLDataChild))) {
1068                eNodeType = XFA_Element::DataGroup;
1069                break;
1070              }
1071            }
1072          }
1073        }
1074        if (eNodeType == XFA_Element::DataModel)
1075          eNodeType = XFA_Element::DataValue;
1076
1077        CXFA_Node* pXFAChild =
1078            m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType);
1079        if (!pXFAChild)
1080          return;
1081
1082        CFX_WideString wsNodeName;
1083        pXMLElement->GetLocalTagName(wsNodeName);
1084        pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName);
1085        bool bNeedValue = true;
1086        for (int32_t i = 0; i < pXMLElement->CountAttributes(); ++i) {
1087          CFX_WideString wsQualifiedName;
1088          CFX_WideString wsValue;
1089          CFX_WideString wsName;
1090          CFX_WideString wsNS;
1091          pXMLElement->GetAttribute(i, wsQualifiedName, wsValue);
1092          if (!ResolveAttribute(pXMLElement, wsQualifiedName.AsStringC(),
1093                                wsName, wsNS)) {
1094            continue;
1095          }
1096          if (wsName == L"nil" && wsValue == L"true") {
1097            bNeedValue = false;
1098            continue;
1099          }
1100          if (wsNS == L"http://www.xfa.com/schema/xfa-package/" ||
1101              wsNS == L"http://www.xfa.org/schema/xfa-package/" ||
1102              wsNS == L"http://www.w3.org/2001/XMLSchema-instance" ||
1103              wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") {
1104            continue;
1105          }
1106          CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
1107              XFA_XDPPACKET_Datasets, XFA_Element::DataValue);
1108          if (!pXFAMetaData)
1109            return;
1110
1111          pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsName);
1112          pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsQualifiedName);
1113          pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1114          pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains,
1115                                XFA_ATTRIBUTEENUM_MetaData);
1116          pXFAChild->InsertChild(pXFAMetaData);
1117          pXFAMetaData->SetXMLMappingNode(pXMLElement);
1118          pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
1119        }
1120
1121        if (!bNeedValue) {
1122          CFX_WideString wsNilName(L"xsi:nil");
1123          pXMLElement->RemoveAttribute(wsNilName.c_str());
1124        }
1125        pXFANode->InsertChild(pXFAChild);
1126        if (eNodeType == XFA_Element::DataGroup)
1127          ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
1128        else if (bNeedValue)
1129          ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets);
1130
1131        pXFAChild->SetXMLMappingNode(pXMLElement);
1132        pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1133        continue;
1134      }
1135      case FDE_XMLNODE_CharData: {
1136        CFDE_XMLCharData* pXMLCharData =
1137            static_cast<CFDE_XMLCharData*>(pXMLChild);
1138        CFX_WideString wsCharData;
1139        pXMLCharData->GetCharData(wsCharData);
1140        if (IsStringAllWhitespace(wsCharData))
1141          continue;
1142
1143        CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1144                                                      XFA_Element::DataValue);
1145        if (!pXFAChild)
1146          return;
1147
1148        pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData);
1149        pXFANode->InsertChild(pXFAChild);
1150        pXFAChild->SetXMLMappingNode(pXMLCharData);
1151        pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1152        continue;
1153      }
1154      case FDE_XMLNODE_Text: {
1155        CFDE_XMLText* pXMLText = static_cast<CFDE_XMLText*>(pXMLChild);
1156        CFX_WideString wsText;
1157        pXMLText->GetText(wsText);
1158        if (IsStringAllWhitespace(wsText))
1159          continue;
1160
1161        CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1162                                                      XFA_Element::DataValue);
1163        if (!pXFAChild)
1164          return;
1165
1166        pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText);
1167        pXFANode->InsertChild(pXFAChild);
1168        pXFAChild->SetXMLMappingNode(pXMLText);
1169        pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1170        continue;
1171      }
1172      default:
1173        continue;
1174    }
1175  }
1176}
1177
1178void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
1179                                       CFDE_XMLNode* pXMLNode,
1180                                       XFA_XDPPACKET ePacketID) {
1181  CFX_WideTextBuf wsValueTextBuf;
1182  CFX_WideTextBuf wsCurValueTextBuf;
1183  bool bMarkAsCompound = false;
1184  CFDE_XMLNode* pXMLCurValueNode = nullptr;
1185  for (CFDE_XMLNode* pXMLChild =
1186           pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
1187       pXMLChild;
1188       pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1189    FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
1190    if (eNodeType == FDE_XMLNODE_Instruction)
1191      continue;
1192
1193    CFX_WideString wsText;
1194    if (eNodeType == FDE_XMLNODE_Text) {
1195      static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
1196      if (!pXMLCurValueNode)
1197        pXMLCurValueNode = pXMLChild;
1198
1199      wsCurValueTextBuf << wsText;
1200    } else if (eNodeType == FDE_XMLNODE_CharData) {
1201      static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsText);
1202      if (!pXMLCurValueNode)
1203        pXMLCurValueNode = pXMLChild;
1204
1205      wsCurValueTextBuf << wsText;
1206    } else if (XFA_RecognizeRichText(
1207                   static_cast<CFDE_XMLElement*>(pXMLChild))) {
1208      XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
1209                                   wsText);
1210      if (!pXMLCurValueNode)
1211        pXMLCurValueNode = pXMLChild;
1212
1213      wsCurValueTextBuf << wsText;
1214    } else {
1215      bMarkAsCompound = true;
1216      if (pXMLCurValueNode) {
1217        CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1218        if (!wsCurValue.IsEmpty()) {
1219          CXFA_Node* pXFAChild =
1220              m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1221          if (!pXFAChild)
1222            return;
1223
1224          pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1225          pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1226          pXFANode->InsertChild(pXFAChild);
1227          pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1228          pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1229          wsValueTextBuf << wsCurValue;
1230          wsCurValueTextBuf.Clear();
1231        }
1232        pXMLCurValueNode = nullptr;
1233      }
1234      CXFA_Node* pXFAChild =
1235          m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1236      if (!pXFAChild)
1237        return;
1238
1239      CFX_WideString wsNodeStr;
1240      static_cast<CFDE_XMLElement*>(pXMLChild)->GetLocalTagName(wsNodeStr);
1241      pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
1242      ParseDataValue(pXFAChild, pXMLChild, ePacketID);
1243      pXFANode->InsertChild(pXFAChild);
1244      pXFAChild->SetXMLMappingNode(pXMLChild);
1245      pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1246      CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value);
1247      wsValueTextBuf << wsCurValue;
1248    }
1249  }
1250  if (pXMLCurValueNode) {
1251    CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1252    if (!wsCurValue.IsEmpty()) {
1253      if (bMarkAsCompound) {
1254        CXFA_Node* pXFAChild =
1255            m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1256        if (!pXFAChild)
1257          return;
1258
1259        pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1260        pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1261        pXFANode->InsertChild(pXFAChild);
1262        pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1263        pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1264      }
1265      wsValueTextBuf << wsCurValue;
1266      wsCurValueTextBuf.Clear();
1267    }
1268    pXMLCurValueNode = nullptr;
1269  }
1270  CFX_WideString wsNodeValue = wsValueTextBuf.MakeString();
1271  pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue);
1272}
1273
1274void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
1275                                         CFDE_XMLInstruction* pXMLInstruction,
1276                                         XFA_XDPPACKET ePacketID) {
1277  if (!m_bDocumentParser)
1278    return;
1279
1280  CFX_WideString wsTargetName;
1281  pXMLInstruction->GetTargetName(wsTargetName);
1282  if (wsTargetName == L"originalXFAVersion") {
1283    CFX_WideString wsData;
1284    if (pXMLInstruction->GetData(0, wsData) &&
1285        (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) !=
1286         XFA_VERSION_UNKNOWN)) {
1287      wsData.clear();
1288      if (pXMLInstruction->GetData(1, wsData) &&
1289          wsData == L"v2.7-scripting:1") {
1290        pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true);
1291      }
1292    }
1293  } else if (wsTargetName == L"acrobat") {
1294    CFX_WideString wsData;
1295    if (pXMLInstruction->GetData(0, wsData) && wsData == L"JavaScript") {
1296      if (pXMLInstruction->GetData(1, wsData) && wsData == L"strictScoping") {
1297        pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true);
1298      }
1299    }
1300  }
1301}
1302
1303void CXFA_SimpleParser::CloseParser() {
1304  m_pXMLDoc.reset();
1305  m_pStream.Reset();
1306}
1307