1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "../../include/fpdfapi/fpdf_parser.h"
8#include "../../include/fpdfapi/fpdf_page.h"
9#include "../../include/fpdfdoc/fpdf_tagged.h"
10#include "tagged_int.h"
11const int nMaxRecursion = 32;
12static FX_BOOL IsTagged(const CPDF_Document* pDoc)
13{
14    CPDF_Dictionary* pCatalog = pDoc->GetRoot();
15    CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo"));
16    return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked"));
17}
18CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict)
19{
20    if (!IsTagged(pDoc)) {
21        return NULL;
22    }
23    CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
24    pTree->LoadPageTree(pPageDict);
25    return pTree;
26}
27CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc)
28{
29    if (!IsTagged(pDoc)) {
30        return NULL;
31    }
32    CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
33    pTree->LoadDocTree();
34    return pTree;
35}
36CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
37{
38    CPDF_Dictionary* pCatalog = pDoc->GetRoot();
39    m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot"));
40    if (m_pTreeRoot == NULL) {
41        return;
42    }
43    m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap"));
44}
45CPDF_StructTreeImpl::~CPDF_StructTreeImpl()
46{
47    for (int i = 0; i < m_Kids.GetSize(); i ++)
48        if (m_Kids[i]) {
49            m_Kids[i]->Release();
50        }
51}
52void CPDF_StructTreeImpl::LoadDocTree()
53{
54    m_pPage = NULL;
55    if (m_pTreeRoot == NULL) {
56        return;
57    }
58    CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
59    if (pKids == NULL) {
60        return;
61    }
62    if (pKids->GetType() == PDFOBJ_DICTIONARY) {
63        CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids);
64        m_Kids.Add(pStructElementImpl);
65        return;
66    }
67    if (pKids->GetType() != PDFOBJ_ARRAY) {
68        return;
69    }
70    CPDF_Array* pArray = (CPDF_Array*)pKids;
71    for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
72        CPDF_Dictionary* pKid = pArray->GetDict(i);
73        CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, pKid);
74        m_Kids.Add(pStructElementImpl);
75    }
76}
77void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict)
78{
79    m_pPage = pPageDict;
80    if (m_pTreeRoot == NULL) {
81        return;
82    }
83    CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
84    if (pKids == NULL) {
85        return;
86    }
87    FX_DWORD dwKids = 0;
88    if (pKids->GetType() == PDFOBJ_DICTIONARY) {
89        dwKids = 1;
90    } else if (pKids->GetType() == PDFOBJ_ARRAY) {
91        dwKids = ((CPDF_Array*)pKids)->GetCount();
92    } else {
93        return;
94    }
95    FX_DWORD i;
96    m_Kids.SetSize(dwKids);
97    for (i = 0; i < dwKids; i ++) {
98        m_Kids[i] = NULL;
99    }
100    CFX_MapPtrToPtr element_map;
101    CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree"));
102    if (pParentTree == NULL) {
103        return;
104    }
105    CPDF_NumberTree parent_tree(pParentTree);
106    int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1);
107    if (parents_id >= 0) {
108        CPDF_Object* pParents = parent_tree.LookupValue(parents_id);
109        if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) {
110            return;
111        }
112        CPDF_Array* pParentArray = (CPDF_Array*)pParents;
113        for (i = 0; i < pParentArray->GetCount(); i ++) {
114            CPDF_Dictionary* pParent = pParentArray->GetDict(i);
115            if (pParent == NULL) {
116                continue;
117            }
118            AddPageNode(pParent, element_map);
119        }
120    }
121}
122CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel)
123{
124    if (nLevel > nMaxRecursion) {
125        return NULL;
126    }
127    CPDF_StructElementImpl* pElement = NULL;
128    if (map.Lookup(pDict, (FX_LPVOID&)pElement)) {
129        return pElement;
130    }
131    pElement = new CPDF_StructElementImpl(this, NULL, pDict);
132    map.SetAt(pDict, pElement);
133    CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P"));
134    if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) {
135        if (!AddTopLevelNode(pDict, pElement)) {
136            pElement->Release();
137            map.RemoveKey(pDict);
138        }
139    } else {
140        CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1);
141        FX_BOOL bSave = FALSE;
142        for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) {
143            if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
144                continue;
145            }
146            if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
147                continue;
148            }
149            pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
150            bSave = TRUE;
151        }
152        if (!bSave) {
153            pElement->Release();
154            map.RemoveKey(pDict);
155        }
156    }
157    return pElement;
158}
159FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement)
160{
161    CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
162    if (!pObj) {
163        return FALSE;
164    }
165    if (pObj->GetType() == PDFOBJ_DICTIONARY) {
166        if (pObj->GetObjNum() == pDict->GetObjNum()) {
167            if (m_Kids[0]) {
168                m_Kids[0]->Release();
169            }
170            m_Kids[0] = pElement->Retain();
171        } else {
172            return FALSE;
173        }
174    }
175    if (pObj->GetType() == PDFOBJ_ARRAY) {
176        CPDF_Array* pTopKids = (CPDF_Array*)pObj;
177        FX_DWORD i;
178        FX_BOOL bSave = FALSE;
179        for (i = 0; i < pTopKids->GetCount(); i ++) {
180            CPDF_Object* pKidRef = pTopKids->GetElement(i);
181            if (pKidRef == NULL || pKidRef->GetType() != PDFOBJ_REFERENCE) {
182                continue;
183            }
184            if (((CPDF_Reference*) pKidRef)->GetRefObjNum() != pDict->GetObjNum()) {
185                continue;
186            }
187            if (m_Kids[i]) {
188                m_Kids[i]->Release();
189            }
190            m_Kids[i] = pElement->Retain();
191            bSave = TRUE;
192        }
193        if (!bSave) {
194            return FALSE;
195        }
196    }
197    return TRUE;
198}
199CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict)
200    : m_RefCount(0)
201{
202    m_pTree = pTree;
203    m_pDict = pDict;
204    m_Type = pDict->GetString(FX_BSTRC("S"));
205    if (pTree->m_pRoleMap) {
206        CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
207        if (!mapped.IsEmpty()) {
208            m_Type = mapped;
209        }
210    }
211    m_pParent = pParent;
212    LoadKids(pDict);
213}
214CPDF_StructElementImpl::~CPDF_StructElementImpl()
215{
216    for (int i = 0; i < m_Kids.GetSize(); i ++) {
217        if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) {
218            ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
219        }
220    }
221}
222CPDF_StructElementImpl* CPDF_StructElementImpl::Retain()
223{
224    m_RefCount++;
225    return this;
226}
227void CPDF_StructElementImpl::Release()
228{
229    if(--m_RefCount < 1) {
230        delete this;
231    }
232}
233void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict)
234{
235    CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg"));
236    FX_DWORD PageObjNum = 0;
237    if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
238        PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
239    }
240    CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K"));
241    if (pKids == NULL) {
242        return;
243    }
244    if (pKids->GetType() == PDFOBJ_ARRAY) {
245        CPDF_Array* pArray = (CPDF_Array*)pKids;
246        m_Kids.SetSize(pArray->GetCount());
247        for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
248            CPDF_Object* pKid = pArray->GetElementValue(i);
249            LoadKid(PageObjNum, pKid, &m_Kids[i]);
250        }
251    } else {
252        m_Kids.SetSize(1);
253        LoadKid(PageObjNum, pKids, &m_Kids[0]);
254    }
255}
256void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid)
257{
258    pKid->m_Type = CPDF_StructKid::Invalid;
259    if (pKidObj == NULL) {
260        return;
261    }
262    if (pKidObj->GetType() == PDFOBJ_NUMBER) {
263        if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
264            return;
265        }
266        pKid->m_Type = CPDF_StructKid::PageContent;
267        pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
268        pKid->m_PageContent.m_PageObjNum = PageObjNum;
269        return;
270    }
271    if (pKidObj->GetType() != PDFOBJ_DICTIONARY) {
272        return;
273    }
274    CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj;
275    CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg"));
276    if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) {
277        PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum();
278    }
279    CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type"));
280    if (type == FX_BSTRC("MCR")) {
281        if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
282            return;
283        }
284        pKid->m_Type = CPDF_StructKid::StreamContent;
285        CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm"));
286        if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) {
287            pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum();
288        } else {
289            pKid->m_StreamContent.m_RefObjNum = 0;
290        }
291        pKid->m_StreamContent.m_PageObjNum = PageObjNum;
292        pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID"));
293    } else if (type == FX_BSTRC("OBJR")) {
294        if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
295            return;
296        }
297        pKid->m_Type = CPDF_StructKid::Object;
298        CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj"));
299        if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
300            pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
301        } else {
302            pKid->m_Object.m_RefObjNum = 0;
303        }
304        pKid->m_Object.m_PageObjNum = PageObjNum;
305    } else {
306        pKid->m_Type = CPDF_StructKid::Element;
307        pKid->m_Element.m_pDict = pKidDict;
308        if (m_pTree->m_pPage == NULL) {
309            pKid->m_Element.m_pElement = new CPDF_StructElementImpl(m_pTree, this, pKidDict);
310        } else {
311            pKid->m_Element.m_pElement = NULL;
312        }
313    }
314}
315static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F)
316{
317    if (nLevel > nMaxRecursion) {
318        return NULL;
319    }
320    if (pAttrs == NULL) {
321        return NULL;
322    }
323    CPDF_Dictionary* pDict = NULL;
324    if (pAttrs->GetType() == PDFOBJ_DICTIONARY) {
325        pDict = (CPDF_Dictionary*)pAttrs;
326    } else if (pAttrs->GetType() == PDFOBJ_STREAM) {
327        pDict = ((CPDF_Stream*)pAttrs)->GetDict();
328    } else if (pAttrs->GetType() == PDFOBJ_ARRAY) {
329        CPDF_Array* pArray = (CPDF_Array*)pAttrs;
330        for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
331            CPDF_Object* pElement = pArray->GetElementValue(i);
332            pDict = FindAttrDict(pElement, owner, nLevel + 1);
333            if (pDict) {
334                return pDict;
335            }
336        }
337    }
338    if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) {
339        return pDict;
340    }
341    return NULL;
342}
343CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel)
344{
345    if (fLevel > nMaxRecursion) {
346        return NULL;
347    }
348    if (bInheritable) {
349        CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
350        if (pAttr) {
351            return pAttr;
352        }
353        if (m_pParent == NULL) {
354            return NULL;
355        }
356        return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
357    }
358    CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A"));
359    if (pA) {
360        CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
361        if (pAttrDict) {
362            CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
363            if (pAttr) {
364                return pAttr;
365            }
366        }
367    }
368    CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C"));
369    if (pC == NULL) {
370        return NULL;
371    }
372    CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap"));
373    if (pClassMap == NULL) {
374        return NULL;
375    }
376    if (pC->GetType() == PDFOBJ_ARRAY) {
377        CPDF_Array* pArray = (CPDF_Array*)pC;
378        for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
379            CFX_ByteString class_name = pArray->GetString(i);
380            CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
381            if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
382                return pClassDict->GetElementValue(name);
383            }
384        }
385        return NULL;
386    }
387    CFX_ByteString class_name = pC->GetString();
388    CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
389    if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
390        return pClassDict->GetElementValue(name);
391    }
392    return NULL;
393}
394CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex)
395{
396    CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
397    if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) {
398        return pAttr;
399    }
400    CPDF_Array* pArray = (CPDF_Array*)pAttr;
401    if (subindex >= (int)pArray->GetCount()) {
402        return pAttr;
403    }
404    return pArray->GetElementValue(subindex);
405}
406CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex)
407{
408    CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
409    if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) {
410        return default_value;
411    }
412    return pAttr->GetString();
413}
414FX_ARGB	CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex)
415{
416    CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
417    if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) {
418        return default_value;
419    }
420    CPDF_Array* pArray = (CPDF_Array*)pAttr;
421    return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255);
422}
423FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex)
424{
425    CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
426    if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
427        return default_value;
428    }
429    return pAttr->GetNumber();
430}
431int	CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex)
432{
433    CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
434    if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
435        return default_value;
436    }
437    return pAttr->GetInteger();
438}
439