1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#include "../../include/fpdfapi/fpdf_parser.h" 8#include "../../include/fpdfapi/fpdf_page.h" 9#include "../../include/fpdfdoc/fpdf_tagged.h" 10#include "tagged_int.h" 11const int nMaxRecursion = 32; 12static FX_BOOL IsTagged(const CPDF_Document* pDoc) 13{ 14 CPDF_Dictionary* pCatalog = pDoc->GetRoot(); 15 CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo")); 16 return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked")); 17} 18CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict) 19{ 20 if (!IsTagged(pDoc)) { 21 return NULL; 22 } 23 CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc); 24 pTree->LoadPageTree(pPageDict); 25 return pTree; 26} 27CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc) 28{ 29 if (!IsTagged(pDoc)) { 30 return NULL; 31 } 32 CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc); 33 pTree->LoadDocTree(); 34 return pTree; 35} 36CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc) 37{ 38 CPDF_Dictionary* pCatalog = pDoc->GetRoot(); 39 m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot")); 40 if (m_pTreeRoot == NULL) { 41 return; 42 } 43 m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap")); 44} 45CPDF_StructTreeImpl::~CPDF_StructTreeImpl() 46{ 47 for (int i = 0; i < m_Kids.GetSize(); i ++) 48 if (m_Kids[i]) { 49 m_Kids[i]->Release(); 50 } 51} 52void CPDF_StructTreeImpl::LoadDocTree() 53{ 54 m_pPage = NULL; 55 if (m_pTreeRoot == NULL) { 56 return; 57 } 58 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K")); 59 if (pKids == NULL) { 60 return; 61 } 62 if (pKids->GetType() == PDFOBJ_DICTIONARY) { 63 CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids); 64 m_Kids.Add(pStructElementImpl); 65 return; 66 } 67 if (pKids->GetType() != PDFOBJ_ARRAY) { 68 return; 69 } 70 CPDF_Array* pArray = (CPDF_Array*)pKids; 71 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) { 72 CPDF_Dictionary* pKid = pArray->GetDict(i); 73 CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, pKid); 74 m_Kids.Add(pStructElementImpl); 75 } 76} 77void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict) 78{ 79 m_pPage = pPageDict; 80 if (m_pTreeRoot == NULL) { 81 return; 82 } 83 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K")); 84 if (pKids == NULL) { 85 return; 86 } 87 FX_DWORD dwKids = 0; 88 if (pKids->GetType() == PDFOBJ_DICTIONARY) { 89 dwKids = 1; 90 } else if (pKids->GetType() == PDFOBJ_ARRAY) { 91 dwKids = ((CPDF_Array*)pKids)->GetCount(); 92 } else { 93 return; 94 } 95 FX_DWORD i; 96 m_Kids.SetSize(dwKids); 97 for (i = 0; i < dwKids; i ++) { 98 m_Kids[i] = NULL; 99 } 100 CFX_MapPtrToPtr element_map; 101 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree")); 102 if (pParentTree == NULL) { 103 return; 104 } 105 CPDF_NumberTree parent_tree(pParentTree); 106 int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1); 107 if (parents_id >= 0) { 108 CPDF_Object* pParents = parent_tree.LookupValue(parents_id); 109 if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) { 110 return; 111 } 112 CPDF_Array* pParentArray = (CPDF_Array*)pParents; 113 for (i = 0; i < pParentArray->GetCount(); i ++) { 114 CPDF_Dictionary* pParent = pParentArray->GetDict(i); 115 if (pParent == NULL) { 116 continue; 117 } 118 AddPageNode(pParent, element_map); 119 } 120 } 121} 122CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel) 123{ 124 if (nLevel > nMaxRecursion) { 125 return NULL; 126 } 127 CPDF_StructElementImpl* pElement = NULL; 128 if (map.Lookup(pDict, (FX_LPVOID&)pElement)) { 129 return pElement; 130 } 131 pElement = new CPDF_StructElementImpl(this, NULL, pDict); 132 map.SetAt(pDict, pElement); 133 CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P")); 134 if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) { 135 if (!AddTopLevelNode(pDict, pElement)) { 136 pElement->Release(); 137 map.RemoveKey(pDict); 138 } 139 } else { 140 CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1); 141 FX_BOOL bSave = FALSE; 142 for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) { 143 if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) { 144 continue; 145 } 146 if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) { 147 continue; 148 } 149 pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain(); 150 bSave = TRUE; 151 } 152 if (!bSave) { 153 pElement->Release(); 154 map.RemoveKey(pDict); 155 } 156 } 157 return pElement; 158} 159FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement) 160{ 161 CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K")); 162 if (!pObj) { 163 return FALSE; 164 } 165 if (pObj->GetType() == PDFOBJ_DICTIONARY) { 166 if (pObj->GetObjNum() == pDict->GetObjNum()) { 167 if (m_Kids[0]) { 168 m_Kids[0]->Release(); 169 } 170 m_Kids[0] = pElement->Retain(); 171 } else { 172 return FALSE; 173 } 174 } 175 if (pObj->GetType() == PDFOBJ_ARRAY) { 176 CPDF_Array* pTopKids = (CPDF_Array*)pObj; 177 FX_DWORD i; 178 FX_BOOL bSave = FALSE; 179 for (i = 0; i < pTopKids->GetCount(); i ++) { 180 CPDF_Object* pKidRef = pTopKids->GetElement(i); 181 if (pKidRef == NULL || pKidRef->GetType() != PDFOBJ_REFERENCE) { 182 continue; 183 } 184 if (((CPDF_Reference*) pKidRef)->GetRefObjNum() != pDict->GetObjNum()) { 185 continue; 186 } 187 if (m_Kids[i]) { 188 m_Kids[i]->Release(); 189 } 190 m_Kids[i] = pElement->Retain(); 191 bSave = TRUE; 192 } 193 if (!bSave) { 194 return FALSE; 195 } 196 } 197 return TRUE; 198} 199CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict) 200 : m_RefCount(0) 201{ 202 m_pTree = pTree; 203 m_pDict = pDict; 204 m_Type = pDict->GetString(FX_BSTRC("S")); 205 if (pTree->m_pRoleMap) { 206 CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type); 207 if (!mapped.IsEmpty()) { 208 m_Type = mapped; 209 } 210 } 211 m_pParent = pParent; 212 LoadKids(pDict); 213} 214CPDF_StructElementImpl::~CPDF_StructElementImpl() 215{ 216 for (int i = 0; i < m_Kids.GetSize(); i ++) { 217 if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) { 218 ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release(); 219 } 220 } 221} 222CPDF_StructElementImpl* CPDF_StructElementImpl::Retain() 223{ 224 m_RefCount++; 225 return this; 226} 227void CPDF_StructElementImpl::Release() 228{ 229 if(--m_RefCount < 1) { 230 delete this; 231 } 232} 233void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict) 234{ 235 CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg")); 236 FX_DWORD PageObjNum = 0; 237 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) { 238 PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum(); 239 } 240 CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K")); 241 if (pKids == NULL) { 242 return; 243 } 244 if (pKids->GetType() == PDFOBJ_ARRAY) { 245 CPDF_Array* pArray = (CPDF_Array*)pKids; 246 m_Kids.SetSize(pArray->GetCount()); 247 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) { 248 CPDF_Object* pKid = pArray->GetElementValue(i); 249 LoadKid(PageObjNum, pKid, &m_Kids[i]); 250 } 251 } else { 252 m_Kids.SetSize(1); 253 LoadKid(PageObjNum, pKids, &m_Kids[0]); 254 } 255} 256void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid) 257{ 258 pKid->m_Type = CPDF_StructKid::Invalid; 259 if (pKidObj == NULL) { 260 return; 261 } 262 if (pKidObj->GetType() == PDFOBJ_NUMBER) { 263 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 264 return; 265 } 266 pKid->m_Type = CPDF_StructKid::PageContent; 267 pKid->m_PageContent.m_ContentId = pKidObj->GetInteger(); 268 pKid->m_PageContent.m_PageObjNum = PageObjNum; 269 return; 270 } 271 if (pKidObj->GetType() != PDFOBJ_DICTIONARY) { 272 return; 273 } 274 CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj; 275 CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg")); 276 if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) { 277 PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum(); 278 } 279 CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type")); 280 if (type == FX_BSTRC("MCR")) { 281 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 282 return; 283 } 284 pKid->m_Type = CPDF_StructKid::StreamContent; 285 CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm")); 286 if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) { 287 pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum(); 288 } else { 289 pKid->m_StreamContent.m_RefObjNum = 0; 290 } 291 pKid->m_StreamContent.m_PageObjNum = PageObjNum; 292 pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID")); 293 } else if (type == FX_BSTRC("OBJR")) { 294 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 295 return; 296 } 297 pKid->m_Type = CPDF_StructKid::Object; 298 CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj")); 299 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) { 300 pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum(); 301 } else { 302 pKid->m_Object.m_RefObjNum = 0; 303 } 304 pKid->m_Object.m_PageObjNum = PageObjNum; 305 } else { 306 pKid->m_Type = CPDF_StructKid::Element; 307 pKid->m_Element.m_pDict = pKidDict; 308 if (m_pTree->m_pPage == NULL) { 309 pKid->m_Element.m_pElement = new CPDF_StructElementImpl(m_pTree, this, pKidDict); 310 } else { 311 pKid->m_Element.m_pElement = NULL; 312 } 313 } 314} 315static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F) 316{ 317 if (nLevel > nMaxRecursion) { 318 return NULL; 319 } 320 if (pAttrs == NULL) { 321 return NULL; 322 } 323 CPDF_Dictionary* pDict = NULL; 324 if (pAttrs->GetType() == PDFOBJ_DICTIONARY) { 325 pDict = (CPDF_Dictionary*)pAttrs; 326 } else if (pAttrs->GetType() == PDFOBJ_STREAM) { 327 pDict = ((CPDF_Stream*)pAttrs)->GetDict(); 328 } else if (pAttrs->GetType() == PDFOBJ_ARRAY) { 329 CPDF_Array* pArray = (CPDF_Array*)pAttrs; 330 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) { 331 CPDF_Object* pElement = pArray->GetElementValue(i); 332 pDict = FindAttrDict(pElement, owner, nLevel + 1); 333 if (pDict) { 334 return pDict; 335 } 336 } 337 } 338 if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) { 339 return pDict; 340 } 341 return NULL; 342} 343CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel) 344{ 345 if (fLevel > nMaxRecursion) { 346 return NULL; 347 } 348 if (bInheritable) { 349 CPDF_Object* pAttr = GetAttr(owner, name, FALSE); 350 if (pAttr) { 351 return pAttr; 352 } 353 if (m_pParent == NULL) { 354 return NULL; 355 } 356 return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1); 357 } 358 CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A")); 359 if (pA) { 360 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner); 361 if (pAttrDict) { 362 CPDF_Object* pAttr = pAttrDict->GetElementValue(name); 363 if (pAttr) { 364 return pAttr; 365 } 366 } 367 } 368 CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C")); 369 if (pC == NULL) { 370 return NULL; 371 } 372 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap")); 373 if (pClassMap == NULL) { 374 return NULL; 375 } 376 if (pC->GetType() == PDFOBJ_ARRAY) { 377 CPDF_Array* pArray = (CPDF_Array*)pC; 378 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) { 379 CFX_ByteString class_name = pArray->GetString(i); 380 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name); 381 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) { 382 return pClassDict->GetElementValue(name); 383 } 384 } 385 return NULL; 386 } 387 CFX_ByteString class_name = pC->GetString(); 388 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name); 389 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) { 390 return pClassDict->GetElementValue(name); 391 } 392 return NULL; 393} 394CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex) 395{ 396 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable); 397 if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) { 398 return pAttr; 399 } 400 CPDF_Array* pArray = (CPDF_Array*)pAttr; 401 if (subindex >= (int)pArray->GetCount()) { 402 return pAttr; 403 } 404 return pArray->GetElementValue(subindex); 405} 406CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex) 407{ 408 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 409 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) { 410 return default_value; 411 } 412 return pAttr->GetString(); 413} 414FX_ARGB CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex) 415{ 416 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 417 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) { 418 return default_value; 419 } 420 CPDF_Array* pArray = (CPDF_Array*)pAttr; 421 return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255); 422} 423FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex) 424{ 425 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 426 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) { 427 return default_value; 428 } 429 return pAttr->GetNumber(); 430} 431int CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex) 432{ 433 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 434 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) { 435 return default_value; 436 } 437 return pAttr->GetInteger(); 438} 439