1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#include "core/include/fpdfapi/fpdf_page.h" 8#include "core/include/fpdfapi/fpdf_parser.h" 9#include "core/include/fpdfdoc/fpdf_tagged.h" 10#include "tagged_int.h" 11 12const int nMaxRecursion = 32; 13static FX_BOOL IsTagged(const CPDF_Document* pDoc) { 14 CPDF_Dictionary* pCatalog = pDoc->GetRoot(); 15 CPDF_Dictionary* pMarkInfo = pCatalog->GetDict("MarkInfo"); 16 return pMarkInfo != NULL && pMarkInfo->GetInteger("Marked"); 17} 18CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, 19 const CPDF_Dictionary* pPageDict) { 20 if (!IsTagged(pDoc)) { 21 return NULL; 22 } 23 CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc); 24 pTree->LoadPageTree(pPageDict); 25 return pTree; 26} 27CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc) { 28 if (!IsTagged(pDoc)) { 29 return NULL; 30 } 31 CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc); 32 pTree->LoadDocTree(); 33 return pTree; 34} 35CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc) { 36 CPDF_Dictionary* pCatalog = pDoc->GetRoot(); 37 m_pTreeRoot = pCatalog->GetDict("StructTreeRoot"); 38 if (!m_pTreeRoot) { 39 return; 40 } 41 m_pRoleMap = m_pTreeRoot->GetDict("RoleMap"); 42} 43CPDF_StructTreeImpl::~CPDF_StructTreeImpl() { 44 for (int i = 0; i < m_Kids.GetSize(); i++) 45 if (m_Kids[i]) { 46 m_Kids[i]->Release(); 47 } 48} 49void CPDF_StructTreeImpl::LoadDocTree() { 50 m_pPage = nullptr; 51 if (!m_pTreeRoot) 52 return; 53 54 CPDF_Object* pKids = m_pTreeRoot->GetElementValue("K"); 55 if (!pKids) 56 return; 57 if (CPDF_Dictionary* pDict = pKids->AsDictionary()) { 58 CPDF_StructElementImpl* pStructElementImpl = 59 new CPDF_StructElementImpl(this, nullptr, pDict); 60 m_Kids.Add(pStructElementImpl); 61 return; 62 } 63 CPDF_Array* pArray = pKids->AsArray(); 64 if (!pArray) 65 return; 66 67 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { 68 CPDF_Dictionary* pKid = pArray->GetDict(i); 69 CPDF_StructElementImpl* pStructElementImpl = 70 new CPDF_StructElementImpl(this, nullptr, pKid); 71 m_Kids.Add(pStructElementImpl); 72 } 73} 74void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict) { 75 m_pPage = pPageDict; 76 if (!m_pTreeRoot) 77 return; 78 79 CPDF_Object* pKids = m_pTreeRoot->GetElementValue("K"); 80 if (!pKids) 81 return; 82 83 FX_DWORD dwKids = 0; 84 if (pKids->IsDictionary()) 85 dwKids = 1; 86 else if (CPDF_Array* pArray = pKids->AsArray()) 87 dwKids = pArray->GetCount(); 88 else 89 return; 90 91 FX_DWORD i; 92 m_Kids.SetSize(dwKids); 93 for (i = 0; i < dwKids; i++) { 94 m_Kids[i] = NULL; 95 } 96 CFX_MapPtrToPtr element_map; 97 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict("ParentTree"); 98 if (!pParentTree) { 99 return; 100 } 101 CPDF_NumberTree parent_tree(pParentTree); 102 int parents_id = pPageDict->GetInteger("StructParents", -1); 103 if (parents_id >= 0) { 104 CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id)); 105 if (!pParentArray) 106 return; 107 108 for (i = 0; i < pParentArray->GetCount(); i++) { 109 CPDF_Dictionary* pParent = pParentArray->GetDict(i); 110 if (!pParent) { 111 continue; 112 } 113 AddPageNode(pParent, element_map); 114 } 115 } 116} 117CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, 118 CFX_MapPtrToPtr& map, 119 int nLevel) { 120 if (nLevel > nMaxRecursion) { 121 return NULL; 122 } 123 CPDF_StructElementImpl* pElement = NULL; 124 if (map.Lookup(pDict, (void*&)pElement)) { 125 return pElement; 126 } 127 pElement = new CPDF_StructElementImpl(this, NULL, pDict); 128 map.SetAt(pDict, pElement); 129 CPDF_Dictionary* pParent = pDict->GetDict("P"); 130 if (!pParent || pParent->GetString("Type") == "StructTreeRoot") { 131 if (!AddTopLevelNode(pDict, pElement)) { 132 pElement->Release(); 133 map.RemoveKey(pDict); 134 } 135 } else { 136 CPDF_StructElementImpl* pParentElement = 137 AddPageNode(pParent, map, nLevel + 1); 138 FX_BOOL bSave = FALSE; 139 for (int i = 0; i < pParentElement->m_Kids.GetSize(); i++) { 140 if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) { 141 continue; 142 } 143 if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) { 144 continue; 145 } 146 pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain(); 147 bSave = TRUE; 148 } 149 if (!bSave) { 150 pElement->Release(); 151 map.RemoveKey(pDict); 152 } 153 } 154 return pElement; 155} 156FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, 157 CPDF_StructElementImpl* pElement) { 158 CPDF_Object* pObj = m_pTreeRoot->GetElementValue("K"); 159 if (!pObj) { 160 return FALSE; 161 } 162 if (pObj->IsDictionary()) { 163 if (pObj->GetObjNum() == pDict->GetObjNum()) { 164 if (m_Kids[0]) { 165 m_Kids[0]->Release(); 166 } 167 m_Kids[0] = pElement->Retain(); 168 } else { 169 return FALSE; 170 } 171 } 172 if (CPDF_Array* pTopKids = pObj->AsArray()) { 173 FX_DWORD i; 174 FX_BOOL bSave = FALSE; 175 for (i = 0; i < pTopKids->GetCount(); i++) { 176 CPDF_Reference* pKidRef = ToReference(pTopKids->GetElement(i)); 177 if (!pKidRef) 178 continue; 179 if (pKidRef->GetRefObjNum() != pDict->GetObjNum()) 180 continue; 181 182 if (m_Kids[i]) 183 m_Kids[i]->Release(); 184 m_Kids[i] = pElement->Retain(); 185 bSave = TRUE; 186 } 187 if (!bSave) 188 return FALSE; 189 } 190 return TRUE; 191} 192CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, 193 CPDF_StructElementImpl* pParent, 194 CPDF_Dictionary* pDict) 195 : m_RefCount(0) { 196 m_pTree = pTree; 197 m_pDict = pDict; 198 m_Type = pDict->GetString("S"); 199 if (pTree->m_pRoleMap) { 200 CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type); 201 if (!mapped.IsEmpty()) { 202 m_Type = mapped; 203 } 204 } 205 m_pParent = pParent; 206 LoadKids(pDict); 207} 208CPDF_StructElementImpl::~CPDF_StructElementImpl() { 209 for (int i = 0; i < m_Kids.GetSize(); i++) { 210 if (m_Kids[i].m_Type == CPDF_StructKid::Element && 211 m_Kids[i].m_Element.m_pElement) { 212 ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release(); 213 } 214 } 215} 216CPDF_StructElementImpl* CPDF_StructElementImpl::Retain() { 217 m_RefCount++; 218 return this; 219} 220void CPDF_StructElementImpl::Release() { 221 if (--m_RefCount < 1) { 222 delete this; 223 } 224} 225void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict) { 226 CPDF_Object* pObj = pDict->GetElement("Pg"); 227 FX_DWORD PageObjNum = 0; 228 if (CPDF_Reference* pRef = ToReference(pObj)) 229 PageObjNum = pRef->GetRefObjNum(); 230 231 CPDF_Object* pKids = pDict->GetElementValue("K"); 232 if (!pKids) 233 return; 234 235 if (CPDF_Array* pArray = pKids->AsArray()) { 236 m_Kids.SetSize(pArray->GetCount()); 237 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { 238 CPDF_Object* pKid = pArray->GetElementValue(i); 239 LoadKid(PageObjNum, pKid, &m_Kids[i]); 240 } 241 } else { 242 m_Kids.SetSize(1); 243 LoadKid(PageObjNum, pKids, &m_Kids[0]); 244 } 245} 246void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, 247 CPDF_Object* pKidObj, 248 CPDF_StructKid* pKid) { 249 pKid->m_Type = CPDF_StructKid::Invalid; 250 if (!pKidObj) 251 return; 252 253 if (pKidObj->IsNumber()) { 254 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 255 return; 256 } 257 pKid->m_Type = CPDF_StructKid::PageContent; 258 pKid->m_PageContent.m_ContentId = pKidObj->GetInteger(); 259 pKid->m_PageContent.m_PageObjNum = PageObjNum; 260 return; 261 } 262 263 CPDF_Dictionary* pKidDict = pKidObj->AsDictionary(); 264 if (!pKidDict) 265 return; 266 267 if (CPDF_Reference* pRef = ToReference(pKidDict->GetElement("Pg"))) 268 PageObjNum = pRef->GetRefObjNum(); 269 270 CFX_ByteString type = pKidDict->GetString("Type"); 271 if (type == "MCR") { 272 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 273 return; 274 } 275 pKid->m_Type = CPDF_StructKid::StreamContent; 276 if (CPDF_Reference* pRef = ToReference(pKidDict->GetElement("Stm"))) { 277 pKid->m_StreamContent.m_RefObjNum = pRef->GetRefObjNum(); 278 } else { 279 pKid->m_StreamContent.m_RefObjNum = 0; 280 } 281 pKid->m_StreamContent.m_PageObjNum = PageObjNum; 282 pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger("MCID"); 283 } else if (type == "OBJR") { 284 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 285 return; 286 } 287 pKid->m_Type = CPDF_StructKid::Object; 288 if (CPDF_Reference* pObj = ToReference(pKidDict->GetElement("Obj"))) { 289 pKid->m_Object.m_RefObjNum = pObj->GetRefObjNum(); 290 } else { 291 pKid->m_Object.m_RefObjNum = 0; 292 } 293 pKid->m_Object.m_PageObjNum = PageObjNum; 294 } else { 295 pKid->m_Type = CPDF_StructKid::Element; 296 pKid->m_Element.m_pDict = pKidDict; 297 if (!m_pTree->m_pPage) { 298 pKid->m_Element.m_pElement = 299 new CPDF_StructElementImpl(m_pTree, this, pKidDict); 300 } else { 301 pKid->m_Element.m_pElement = NULL; 302 } 303 } 304} 305static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, 306 const CFX_ByteStringC& owner, 307 FX_FLOAT nLevel = 0.0F) { 308 if (nLevel > nMaxRecursion) 309 return nullptr; 310 if (!pAttrs) 311 return nullptr; 312 313 CPDF_Dictionary* pDict = nullptr; 314 if (pAttrs->IsDictionary()) { 315 pDict = pAttrs->AsDictionary(); 316 } else if (CPDF_Stream* pStream = pAttrs->AsStream()) { 317 pDict = pStream->GetDict(); 318 } else if (CPDF_Array* pArray = pAttrs->AsArray()) { 319 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { 320 CPDF_Object* pElement = pArray->GetElementValue(i); 321 pDict = FindAttrDict(pElement, owner, nLevel + 1); 322 if (pDict) 323 return pDict; 324 } 325 } 326 if (pDict && pDict->GetString("O") == owner) 327 return pDict; 328 return nullptr; 329} 330CPDF_Object* CPDF_StructElementImpl::GetAttr(const CFX_ByteStringC& owner, 331 const CFX_ByteStringC& name, 332 FX_BOOL bInheritable, 333 FX_FLOAT fLevel) { 334 if (fLevel > nMaxRecursion) { 335 return NULL; 336 } 337 if (bInheritable) { 338 CPDF_Object* pAttr = GetAttr(owner, name, FALSE); 339 if (pAttr) { 340 return pAttr; 341 } 342 if (!m_pParent) { 343 return NULL; 344 } 345 return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1); 346 } 347 CPDF_Object* pA = m_pDict->GetElementValue("A"); 348 if (pA) { 349 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner); 350 if (pAttrDict) { 351 CPDF_Object* pAttr = pAttrDict->GetElementValue(name); 352 if (pAttr) { 353 return pAttr; 354 } 355 } 356 } 357 CPDF_Object* pC = m_pDict->GetElementValue("C"); 358 if (!pC) 359 return nullptr; 360 361 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict("ClassMap"); 362 if (!pClassMap) 363 return nullptr; 364 365 if (CPDF_Array* pArray = pC->AsArray()) { 366 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { 367 CFX_ByteString class_name = pArray->GetString(i); 368 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name); 369 if (pClassDict && pClassDict->GetString("O") == owner) 370 return pClassDict->GetElementValue(name); 371 } 372 return nullptr; 373 } 374 CFX_ByteString class_name = pC->GetString(); 375 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name); 376 if (pClassDict && pClassDict->GetString("O") == owner) 377 return pClassDict->GetElementValue(name); 378 return nullptr; 379} 380CPDF_Object* CPDF_StructElementImpl::GetAttr(const CFX_ByteStringC& owner, 381 const CFX_ByteStringC& name, 382 FX_BOOL bInheritable, 383 int subindex) { 384 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable); 385 CPDF_Array* pArray = ToArray(pAttr); 386 if (!pArray || subindex == -1) 387 return pAttr; 388 389 if (subindex >= static_cast<int>(pArray->GetCount())) 390 return pAttr; 391 return pArray->GetElementValue(subindex); 392} 393CFX_ByteString CPDF_StructElementImpl::GetName( 394 const CFX_ByteStringC& owner, 395 const CFX_ByteStringC& name, 396 const CFX_ByteStringC& default_value, 397 FX_BOOL bInheritable, 398 int subindex) { 399 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 400 if (ToName(pAttr)) 401 return pAttr->GetString(); 402 return default_value; 403} 404 405FX_ARGB CPDF_StructElementImpl::GetColor(const CFX_ByteStringC& owner, 406 const CFX_ByteStringC& name, 407 FX_ARGB default_value, 408 FX_BOOL bInheritable, 409 int subindex) { 410 CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex)); 411 if (!pArray) 412 return default_value; 413 return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | 414 ((int)(pArray->GetNumber(1) * 255) << 8) | 415 (int)(pArray->GetNumber(2) * 255); 416} 417FX_FLOAT CPDF_StructElementImpl::GetNumber(const CFX_ByteStringC& owner, 418 const CFX_ByteStringC& name, 419 FX_FLOAT default_value, 420 FX_BOOL bInheritable, 421 int subindex) { 422 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 423 return ToNumber(pAttr) ? pAttr->GetNumber() : default_value; 424} 425int CPDF_StructElementImpl::GetInteger(const CFX_ByteStringC& owner, 426 const CFX_ByteStringC& name, 427 int default_value, 428 FX_BOOL bInheritable, 429 int subindex) { 430 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 431 return ToNumber(pAttr) ? pAttr->GetInteger() : default_value; 432} 433