1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved.
2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be
3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file.
4e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
7e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfdoc/fpdf_doc.h"
8e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fxcrt/fx_xml.h"
9e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef struct _PDFDOC_METADATA {
10e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_Document *m_pDoc;
11e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CXML_Element *m_pXmlElmnt;
12e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CXML_Element *m_pElmntRdf;
13e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_CMapByteStringToPtr *m_pStringMap;
14e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} PDFDOC_METADATA, * PDFDOC_LPMETADATA;
15e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef PDFDOC_METADATA const * PDFDOC_LPCMETADATA;
16e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovconst FX_LPCSTR gs_FPDFDOC_Metadata_Titles[] = {
17e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "Title", "title",
18e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "Subject", "description",
19e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "Author", "creator",
20e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "Keywords", "Keywords",
21e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "Producer", "Producer",
22e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "Creator", "CreatorTool",
23e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "CreationDate", "CreateDate",
24e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "ModDate", "ModifyDate",
25e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    "MetadataDate", "MetadataDate"
26e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov};
27e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCPDF_Metadata::CPDF_Metadata()
28e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
29e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_pData = FX_Alloc(PDFDOC_METADATA, 1);
30e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_CMapByteStringToPtr *&pStringMap = ((PDFDOC_LPMETADATA)m_pData)->m_pStringMap;
31e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pStringMap = new CFX_CMapByteStringToPtr;
32e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_ByteString bstr;
33e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < 18; i += 2) {
34e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        bstr = gs_FPDFDOC_Metadata_Titles[i];
35e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pStringMap->AddValue(bstr, (void*)gs_FPDFDOC_Metadata_Titles[i + 1]);
36e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
37e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
38e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
39e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCPDF_Metadata::~CPDF_Metadata()
40e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
41e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FXSYS_assert(m_pData != NULL);
42e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CXML_Element *&p = ((PDFDOC_LPMETADATA)m_pData)->m_pXmlElmnt;
43e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    delete p;
44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_CMapByteStringToPtr *pStringMap = ((PDFDOC_LPMETADATA)m_pData)->m_pStringMap;
45e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (pStringMap) {
46e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pStringMap->RemoveAll();
47e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        delete pStringMap;
48e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
49e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_Free(m_pData);
50e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
51e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CPDF_Metadata::LoadDoc(CPDF_Document *pDoc)
52e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
53e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FXSYS_assert(pDoc != NULL);
54e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    ((PDFDOC_LPMETADATA)m_pData)->m_pDoc = pDoc;
55e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_Dictionary *pRoot = pDoc->GetRoot();
56e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_Stream *pStream = pRoot->GetStream(FX_BSTRC("Metadata"));
57e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!pStream) {
58e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
59e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
60e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_StreamAcc acc;
61e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    acc.LoadAllData(pStream, FALSE);
62e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int size = acc.GetSize();
63e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_LPCBYTE pBuf = acc.GetData();
64e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CXML_Element *&pXmlElmnt = ((PDFDOC_LPMETADATA)m_pData)->m_pXmlElmnt;
65e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pXmlElmnt = CXML_Element::Parse(pBuf, size);
66e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!pXmlElmnt) {
67e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
68e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
69e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CXML_Element *&pElmntRdf = ((PDFDOC_LPMETADATA)m_pData)->m_pElmntRdf;
70e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (pXmlElmnt->GetTagName() == FX_BSTRC("RDF")) {
71e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pElmntRdf = pXmlElmnt;
72e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    } else {
73e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pElmntRdf = pXmlElmnt->GetElement(NULL, FX_BSTRC("RDF"));
74e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
75e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
76e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_INT32 CPDF_Metadata::GetString(FX_BSTR bsItem, CFX_WideString &wsStr)
77e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
78e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!((PDFDOC_LPMETADATA)m_pData)->m_pXmlElmnt) {
79e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return -1;
80e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
81e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!((PDFDOC_LPMETADATA)m_pData)->m_pStringMap) {
82e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return -1;
83e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
84e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    void *szTag;
85e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!((PDFDOC_LPMETADATA)m_pData)->m_pStringMap->Lookup(bsItem, szTag)) {
86e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return -1;
87e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
88e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_ByteString bsTag = (FX_LPCSTR)szTag;
89e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    wsStr = L"";
90e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CXML_Element *pElmntRdf = ((PDFDOC_LPMETADATA)m_pData)->m_pElmntRdf;
91e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!pElmntRdf) {
92e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return -1;
93e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
94e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int nChild = pElmntRdf->CountChildren();
95e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < nChild; i++) {
96e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CXML_Element *pTag = pElmntRdf->GetElement(NULL, FX_BSTRC("Description"), i);
97e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (!pTag) {
98e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
99e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
100e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (bsItem == FX_BSTRC("Title") || bsItem == FX_BSTRC("Subject")) {
101e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CXML_Element *pElmnt = pTag->GetElement(NULL, bsTag);
102e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!pElmnt) {
103e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
104e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
105e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pElmnt = pElmnt->GetElement(NULL, FX_BSTRC("Alt"));
106e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!pElmnt) {
107e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
108e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
109e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pElmnt = pElmnt->GetElement(NULL, FX_BSTRC("li"));
110e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!pElmnt) {
111e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
112e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
113e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            wsStr = pElmnt->GetContent(0);
114e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return wsStr.GetLength();
115e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else if (bsItem == FX_BSTRC("Author")) {
116e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CXML_Element *pElmnt = pTag->GetElement(NULL, bsTag);
117e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!pElmnt) {
118e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
119e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
120e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pElmnt = pElmnt->GetElement(NULL, FX_BSTRC("Seq"));
121e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!pElmnt) {
122e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
123e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
124e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pElmnt = pElmnt->GetElement(NULL, FX_BSTRC("li"));
125e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!pElmnt) {
126e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
127e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
128e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            wsStr = pElmnt->GetContent(0);
129e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return wsStr.GetLength();
130e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else {
131e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CXML_Element *pElmnt = pTag->GetElement(NULL, bsTag);
132e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!pElmnt) {
133e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
134e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
135e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            wsStr = pElmnt->GetContent(0);
136e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return wsStr.GetLength();
137e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
138e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
139e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return -1;
140e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
141e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCXML_Element* CPDF_Metadata::GetRoot() const
142e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
143e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return ((PDFDOC_LPMETADATA)m_pData)->m_pXmlElmnt;
144e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
145e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCXML_Element* CPDF_Metadata::GetRDF() const
146e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
147e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return ((PDFDOC_LPMETADATA)m_pData)->m_pElmntRdf;
148e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
149