1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/include/fpdfapi/fpdf_page.h"
8#include "core/include/fpdfapi/fpdf_parser.h"
9#include "core/include/fpdfdoc/fpdf_tagged.h"
10#include "tagged_int.h"
11
12const int nMaxRecursion = 32;
13static FX_BOOL IsTagged(const CPDF_Document* pDoc) {
14  CPDF_Dictionary* pCatalog = pDoc->GetRoot();
15  CPDF_Dictionary* pMarkInfo = pCatalog->GetDict("MarkInfo");
16  return pMarkInfo != NULL && pMarkInfo->GetInteger("Marked");
17}
18CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc,
19                                           const CPDF_Dictionary* pPageDict) {
20  if (!IsTagged(pDoc)) {
21    return NULL;
22  }
23  CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
24  pTree->LoadPageTree(pPageDict);
25  return pTree;
26}
27CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc) {
28  if (!IsTagged(pDoc)) {
29    return NULL;
30  }
31  CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
32  pTree->LoadDocTree();
33  return pTree;
34}
35CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc) {
36  CPDF_Dictionary* pCatalog = pDoc->GetRoot();
37  m_pTreeRoot = pCatalog->GetDict("StructTreeRoot");
38  if (!m_pTreeRoot) {
39    return;
40  }
41  m_pRoleMap = m_pTreeRoot->GetDict("RoleMap");
42}
43CPDF_StructTreeImpl::~CPDF_StructTreeImpl() {
44  for (int i = 0; i < m_Kids.GetSize(); i++)
45    if (m_Kids[i]) {
46      m_Kids[i]->Release();
47    }
48}
49void CPDF_StructTreeImpl::LoadDocTree() {
50  m_pPage = nullptr;
51  if (!m_pTreeRoot)
52    return;
53
54  CPDF_Object* pKids = m_pTreeRoot->GetElementValue("K");
55  if (!pKids)
56    return;
57  if (CPDF_Dictionary* pDict = pKids->AsDictionary()) {
58    CPDF_StructElementImpl* pStructElementImpl =
59        new CPDF_StructElementImpl(this, nullptr, pDict);
60    m_Kids.Add(pStructElementImpl);
61    return;
62  }
63  CPDF_Array* pArray = pKids->AsArray();
64  if (!pArray)
65    return;
66
67  for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
68    CPDF_Dictionary* pKid = pArray->GetDict(i);
69    CPDF_StructElementImpl* pStructElementImpl =
70        new CPDF_StructElementImpl(this, nullptr, pKid);
71    m_Kids.Add(pStructElementImpl);
72  }
73}
74void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict) {
75  m_pPage = pPageDict;
76  if (!m_pTreeRoot)
77    return;
78
79  CPDF_Object* pKids = m_pTreeRoot->GetElementValue("K");
80  if (!pKids)
81    return;
82
83  FX_DWORD dwKids = 0;
84  if (pKids->IsDictionary())
85    dwKids = 1;
86  else if (CPDF_Array* pArray = pKids->AsArray())
87    dwKids = pArray->GetCount();
88  else
89    return;
90
91  FX_DWORD i;
92  m_Kids.SetSize(dwKids);
93  for (i = 0; i < dwKids; i++) {
94    m_Kids[i] = NULL;
95  }
96  CFX_MapPtrToPtr element_map;
97  CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict("ParentTree");
98  if (!pParentTree) {
99    return;
100  }
101  CPDF_NumberTree parent_tree(pParentTree);
102  int parents_id = pPageDict->GetInteger("StructParents", -1);
103  if (parents_id >= 0) {
104    CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
105    if (!pParentArray)
106      return;
107
108    for (i = 0; i < pParentArray->GetCount(); i++) {
109      CPDF_Dictionary* pParent = pParentArray->GetDict(i);
110      if (!pParent) {
111        continue;
112      }
113      AddPageNode(pParent, element_map);
114    }
115  }
116}
117CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict,
118                                                         CFX_MapPtrToPtr& map,
119                                                         int nLevel) {
120  if (nLevel > nMaxRecursion) {
121    return NULL;
122  }
123  CPDF_StructElementImpl* pElement = NULL;
124  if (map.Lookup(pDict, (void*&)pElement)) {
125    return pElement;
126  }
127  pElement = new CPDF_StructElementImpl(this, NULL, pDict);
128  map.SetAt(pDict, pElement);
129  CPDF_Dictionary* pParent = pDict->GetDict("P");
130  if (!pParent || pParent->GetString("Type") == "StructTreeRoot") {
131    if (!AddTopLevelNode(pDict, pElement)) {
132      pElement->Release();
133      map.RemoveKey(pDict);
134    }
135  } else {
136    CPDF_StructElementImpl* pParentElement =
137        AddPageNode(pParent, map, nLevel + 1);
138    FX_BOOL bSave = FALSE;
139    for (int i = 0; i < pParentElement->m_Kids.GetSize(); i++) {
140      if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
141        continue;
142      }
143      if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
144        continue;
145      }
146      pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
147      bSave = TRUE;
148    }
149    if (!bSave) {
150      pElement->Release();
151      map.RemoveKey(pDict);
152    }
153  }
154  return pElement;
155}
156FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict,
157                                             CPDF_StructElementImpl* pElement) {
158  CPDF_Object* pObj = m_pTreeRoot->GetElementValue("K");
159  if (!pObj) {
160    return FALSE;
161  }
162  if (pObj->IsDictionary()) {
163    if (pObj->GetObjNum() == pDict->GetObjNum()) {
164      if (m_Kids[0]) {
165        m_Kids[0]->Release();
166      }
167      m_Kids[0] = pElement->Retain();
168    } else {
169      return FALSE;
170    }
171  }
172  if (CPDF_Array* pTopKids = pObj->AsArray()) {
173    FX_DWORD i;
174    FX_BOOL bSave = FALSE;
175    for (i = 0; i < pTopKids->GetCount(); i++) {
176      CPDF_Reference* pKidRef = ToReference(pTopKids->GetElement(i));
177      if (!pKidRef)
178        continue;
179      if (pKidRef->GetRefObjNum() != pDict->GetObjNum())
180        continue;
181
182      if (m_Kids[i])
183        m_Kids[i]->Release();
184      m_Kids[i] = pElement->Retain();
185      bSave = TRUE;
186    }
187    if (!bSave)
188      return FALSE;
189  }
190  return TRUE;
191}
192CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree,
193                                               CPDF_StructElementImpl* pParent,
194                                               CPDF_Dictionary* pDict)
195    : m_RefCount(0) {
196  m_pTree = pTree;
197  m_pDict = pDict;
198  m_Type = pDict->GetString("S");
199  if (pTree->m_pRoleMap) {
200    CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
201    if (!mapped.IsEmpty()) {
202      m_Type = mapped;
203    }
204  }
205  m_pParent = pParent;
206  LoadKids(pDict);
207}
208CPDF_StructElementImpl::~CPDF_StructElementImpl() {
209  for (int i = 0; i < m_Kids.GetSize(); i++) {
210    if (m_Kids[i].m_Type == CPDF_StructKid::Element &&
211        m_Kids[i].m_Element.m_pElement) {
212      ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
213    }
214  }
215}
216CPDF_StructElementImpl* CPDF_StructElementImpl::Retain() {
217  m_RefCount++;
218  return this;
219}
220void CPDF_StructElementImpl::Release() {
221  if (--m_RefCount < 1) {
222    delete this;
223  }
224}
225void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict) {
226  CPDF_Object* pObj = pDict->GetElement("Pg");
227  FX_DWORD PageObjNum = 0;
228  if (CPDF_Reference* pRef = ToReference(pObj))
229    PageObjNum = pRef->GetRefObjNum();
230
231  CPDF_Object* pKids = pDict->GetElementValue("K");
232  if (!pKids)
233    return;
234
235  if (CPDF_Array* pArray = pKids->AsArray()) {
236    m_Kids.SetSize(pArray->GetCount());
237    for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
238      CPDF_Object* pKid = pArray->GetElementValue(i);
239      LoadKid(PageObjNum, pKid, &m_Kids[i]);
240    }
241  } else {
242    m_Kids.SetSize(1);
243    LoadKid(PageObjNum, pKids, &m_Kids[0]);
244  }
245}
246void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum,
247                                     CPDF_Object* pKidObj,
248                                     CPDF_StructKid* pKid) {
249  pKid->m_Type = CPDF_StructKid::Invalid;
250  if (!pKidObj)
251    return;
252
253  if (pKidObj->IsNumber()) {
254    if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
255      return;
256    }
257    pKid->m_Type = CPDF_StructKid::PageContent;
258    pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
259    pKid->m_PageContent.m_PageObjNum = PageObjNum;
260    return;
261  }
262
263  CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
264  if (!pKidDict)
265    return;
266
267  if (CPDF_Reference* pRef = ToReference(pKidDict->GetElement("Pg")))
268    PageObjNum = pRef->GetRefObjNum();
269
270  CFX_ByteString type = pKidDict->GetString("Type");
271  if (type == "MCR") {
272    if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
273      return;
274    }
275    pKid->m_Type = CPDF_StructKid::StreamContent;
276    if (CPDF_Reference* pRef = ToReference(pKidDict->GetElement("Stm"))) {
277      pKid->m_StreamContent.m_RefObjNum = pRef->GetRefObjNum();
278    } else {
279      pKid->m_StreamContent.m_RefObjNum = 0;
280    }
281    pKid->m_StreamContent.m_PageObjNum = PageObjNum;
282    pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger("MCID");
283  } else if (type == "OBJR") {
284    if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
285      return;
286    }
287    pKid->m_Type = CPDF_StructKid::Object;
288    if (CPDF_Reference* pObj = ToReference(pKidDict->GetElement("Obj"))) {
289      pKid->m_Object.m_RefObjNum = pObj->GetRefObjNum();
290    } else {
291      pKid->m_Object.m_RefObjNum = 0;
292    }
293    pKid->m_Object.m_PageObjNum = PageObjNum;
294  } else {
295    pKid->m_Type = CPDF_StructKid::Element;
296    pKid->m_Element.m_pDict = pKidDict;
297    if (!m_pTree->m_pPage) {
298      pKid->m_Element.m_pElement =
299          new CPDF_StructElementImpl(m_pTree, this, pKidDict);
300    } else {
301      pKid->m_Element.m_pElement = NULL;
302    }
303  }
304}
305static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs,
306                                     const CFX_ByteStringC& owner,
307                                     FX_FLOAT nLevel = 0.0F) {
308  if (nLevel > nMaxRecursion)
309    return nullptr;
310  if (!pAttrs)
311    return nullptr;
312
313  CPDF_Dictionary* pDict = nullptr;
314  if (pAttrs->IsDictionary()) {
315    pDict = pAttrs->AsDictionary();
316  } else if (CPDF_Stream* pStream = pAttrs->AsStream()) {
317    pDict = pStream->GetDict();
318  } else if (CPDF_Array* pArray = pAttrs->AsArray()) {
319    for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
320      CPDF_Object* pElement = pArray->GetElementValue(i);
321      pDict = FindAttrDict(pElement, owner, nLevel + 1);
322      if (pDict)
323        return pDict;
324    }
325  }
326  if (pDict && pDict->GetString("O") == owner)
327    return pDict;
328  return nullptr;
329}
330CPDF_Object* CPDF_StructElementImpl::GetAttr(const CFX_ByteStringC& owner,
331                                             const CFX_ByteStringC& name,
332                                             FX_BOOL bInheritable,
333                                             FX_FLOAT fLevel) {
334  if (fLevel > nMaxRecursion) {
335    return NULL;
336  }
337  if (bInheritable) {
338    CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
339    if (pAttr) {
340      return pAttr;
341    }
342    if (!m_pParent) {
343      return NULL;
344    }
345    return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
346  }
347  CPDF_Object* pA = m_pDict->GetElementValue("A");
348  if (pA) {
349    CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
350    if (pAttrDict) {
351      CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
352      if (pAttr) {
353        return pAttr;
354      }
355    }
356  }
357  CPDF_Object* pC = m_pDict->GetElementValue("C");
358  if (!pC)
359    return nullptr;
360
361  CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict("ClassMap");
362  if (!pClassMap)
363    return nullptr;
364
365  if (CPDF_Array* pArray = pC->AsArray()) {
366    for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
367      CFX_ByteString class_name = pArray->GetString(i);
368      CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
369      if (pClassDict && pClassDict->GetString("O") == owner)
370        return pClassDict->GetElementValue(name);
371    }
372    return nullptr;
373  }
374  CFX_ByteString class_name = pC->GetString();
375  CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
376  if (pClassDict && pClassDict->GetString("O") == owner)
377    return pClassDict->GetElementValue(name);
378  return nullptr;
379}
380CPDF_Object* CPDF_StructElementImpl::GetAttr(const CFX_ByteStringC& owner,
381                                             const CFX_ByteStringC& name,
382                                             FX_BOOL bInheritable,
383                                             int subindex) {
384  CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
385  CPDF_Array* pArray = ToArray(pAttr);
386  if (!pArray || subindex == -1)
387    return pAttr;
388
389  if (subindex >= static_cast<int>(pArray->GetCount()))
390    return pAttr;
391  return pArray->GetElementValue(subindex);
392}
393CFX_ByteString CPDF_StructElementImpl::GetName(
394    const CFX_ByteStringC& owner,
395    const CFX_ByteStringC& name,
396    const CFX_ByteStringC& default_value,
397    FX_BOOL bInheritable,
398    int subindex) {
399  CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
400  if (ToName(pAttr))
401    return pAttr->GetString();
402  return default_value;
403}
404
405FX_ARGB CPDF_StructElementImpl::GetColor(const CFX_ByteStringC& owner,
406                                         const CFX_ByteStringC& name,
407                                         FX_ARGB default_value,
408                                         FX_BOOL bInheritable,
409                                         int subindex) {
410  CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex));
411  if (!pArray)
412    return default_value;
413  return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) |
414         ((int)(pArray->GetNumber(1) * 255) << 8) |
415         (int)(pArray->GetNumber(2) * 255);
416}
417FX_FLOAT CPDF_StructElementImpl::GetNumber(const CFX_ByteStringC& owner,
418                                           const CFX_ByteStringC& name,
419                                           FX_FLOAT default_value,
420                                           FX_BOOL bInheritable,
421                                           int subindex) {
422  CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
423  return ToNumber(pAttr) ? pAttr->GetNumber() : default_value;
424}
425int CPDF_StructElementImpl::GetInteger(const CFX_ByteStringC& owner,
426                                       const CFX_ByteStringC& name,
427                                       int default_value,
428                                       FX_BOOL bInheritable,
429                                       int subindex) {
430  CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
431  return ToNumber(pAttr) ? pAttr->GetInteger() : default_value;
432}
433