1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#include "core/fpdfapi/font/font_int.h" 8 9#include <memory> 10#include <utility> 11 12#include "core/fpdfapi/cmaps/cmap_int.h" 13#include "core/fpdfapi/cpdf_modulemgr.h" 14#include "core/fpdfapi/font/ttgsubtable.h" 15#include "core/fpdfapi/page/cpdf_pagemodule.h" 16#include "core/fpdfapi/parser/cpdf_array.h" 17#include "core/fpdfapi/parser/cpdf_dictionary.h" 18#include "core/fpdfapi/parser/cpdf_simple_parser.h" 19#include "core/fxcrt/fx_ext.h" 20#include "core/fxge/fx_freetype.h" 21#include "third_party/base/logging.h" 22#include "third_party/base/stl_util.h" 23 24namespace { 25 26const FX_CHAR* const g_CharsetNames[CIDSET_NUM_SETS] = { 27 nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"}; 28 29class CPDF_PredefinedCMap { 30 public: 31 const FX_CHAR* m_pName; 32 CIDSet m_Charset; 33 CIDCoding m_Coding; 34 CPDF_CMap::CodingScheme m_CodingScheme; 35 uint8_t m_LeadingSegCount; 36 uint8_t m_LeadingSegs[4]; 37}; 38 39const CPDF_PredefinedCMap g_PredefinedCMaps[] = { 40 {"GB-EUC", 41 CIDSET_GB1, 42 CIDCODING_GB, 43 CPDF_CMap::MixedTwoBytes, 44 1, 45 {0xa1, 0xfe}}, 46 {"GBpc-EUC", 47 CIDSET_GB1, 48 CIDCODING_GB, 49 CPDF_CMap::MixedTwoBytes, 50 1, 51 {0xa1, 0xfc}}, 52 {"GBK-EUC", 53 CIDSET_GB1, 54 CIDCODING_GB, 55 CPDF_CMap::MixedTwoBytes, 56 1, 57 {0x81, 0xfe}}, 58 {"GBKp-EUC", 59 CIDSET_GB1, 60 CIDCODING_GB, 61 CPDF_CMap::MixedTwoBytes, 62 1, 63 {0x81, 0xfe}}, 64 {"GBK2K-EUC", 65 CIDSET_GB1, 66 CIDCODING_GB, 67 CPDF_CMap::MixedTwoBytes, 68 1, 69 {0x81, 0xfe}}, 70 {"GBK2K", 71 CIDSET_GB1, 72 CIDCODING_GB, 73 CPDF_CMap::MixedTwoBytes, 74 1, 75 {0x81, 0xfe}}, 76 {"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, 77 {"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, 78 {"B5pc", 79 CIDSET_CNS1, 80 CIDCODING_BIG5, 81 CPDF_CMap::MixedTwoBytes, 82 1, 83 {0xa1, 0xfc}}, 84 {"HKscs-B5", 85 CIDSET_CNS1, 86 CIDCODING_BIG5, 87 CPDF_CMap::MixedTwoBytes, 88 1, 89 {0x88, 0xfe}}, 90 {"ETen-B5", 91 CIDSET_CNS1, 92 CIDCODING_BIG5, 93 CPDF_CMap::MixedTwoBytes, 94 1, 95 {0xa1, 0xfe}}, 96 {"ETenms-B5", 97 CIDSET_CNS1, 98 CIDCODING_BIG5, 99 CPDF_CMap::MixedTwoBytes, 100 1, 101 {0xa1, 0xfe}}, 102 {"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, 103 {"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, 104 {"83pv-RKSJ", 105 CIDSET_JAPAN1, 106 CIDCODING_JIS, 107 CPDF_CMap::MixedTwoBytes, 108 2, 109 {0x81, 0x9f, 0xe0, 0xfc}}, 110 {"90ms-RKSJ", 111 CIDSET_JAPAN1, 112 CIDCODING_JIS, 113 CPDF_CMap::MixedTwoBytes, 114 2, 115 {0x81, 0x9f, 0xe0, 0xfc}}, 116 {"90msp-RKSJ", 117 CIDSET_JAPAN1, 118 CIDCODING_JIS, 119 CPDF_CMap::MixedTwoBytes, 120 2, 121 {0x81, 0x9f, 0xe0, 0xfc}}, 122 {"90pv-RKSJ", 123 CIDSET_JAPAN1, 124 CIDCODING_JIS, 125 CPDF_CMap::MixedTwoBytes, 126 2, 127 {0x81, 0x9f, 0xe0, 0xfc}}, 128 {"Add-RKSJ", 129 CIDSET_JAPAN1, 130 CIDCODING_JIS, 131 CPDF_CMap::MixedTwoBytes, 132 2, 133 {0x81, 0x9f, 0xe0, 0xfc}}, 134 {"EUC", 135 CIDSET_JAPAN1, 136 CIDCODING_JIS, 137 CPDF_CMap::MixedTwoBytes, 138 2, 139 {0x8e, 0x8e, 0xa1, 0xfe}}, 140 {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}}, 141 {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}}, 142 {"Ext-RKSJ", 143 CIDSET_JAPAN1, 144 CIDCODING_JIS, 145 CPDF_CMap::MixedTwoBytes, 146 2, 147 {0x81, 0x9f, 0xe0, 0xfc}}, 148 {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, 149 {"UniJIS-UCS2-HW", 150 CIDSET_JAPAN1, 151 CIDCODING_UCS2, 152 CPDF_CMap::TwoBytes, 153 0, 154 {}}, 155 {"UniJIS-UTF16", 156 CIDSET_JAPAN1, 157 CIDCODING_UTF16, 158 CPDF_CMap::TwoBytes, 159 0, 160 {}}, 161 {"KSC-EUC", 162 CIDSET_KOREA1, 163 CIDCODING_KOREA, 164 CPDF_CMap::MixedTwoBytes, 165 1, 166 {0xa1, 0xfe}}, 167 {"KSCms-UHC", 168 CIDSET_KOREA1, 169 CIDCODING_KOREA, 170 CPDF_CMap::MixedTwoBytes, 171 1, 172 {0x81, 0xfe}}, 173 {"KSCms-UHC-HW", 174 CIDSET_KOREA1, 175 CIDCODING_KOREA, 176 CPDF_CMap::MixedTwoBytes, 177 1, 178 {0x81, 0xfe}}, 179 {"KSCpc-EUC", 180 CIDSET_KOREA1, 181 CIDCODING_KOREA, 182 CPDF_CMap::MixedTwoBytes, 183 1, 184 {0xa1, 0xfd}}, 185 {"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, 186 {"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, 187}; 188 189CIDSet CIDSetFromSizeT(size_t index) { 190 if (index >= CIDSET_NUM_SETS) { 191 NOTREACHED(); 192 return CIDSET_UNKNOWN; 193 } 194 return static_cast<CIDSet>(index); 195} 196 197CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) { 198 if (word.GetLength() <= 2) 199 return CFX_ByteStringC(); 200 return CFX_ByteStringC(&word[1], word.GetLength() - 2); 201} 202 203int CompareDWORD(const void* data1, const void* data2) { 204 return (*(uint32_t*)data1) - (*(uint32_t*)data2); 205} 206 207int CompareCID(const void* key, const void* element) { 208 if ((*(uint32_t*)key) < (*(uint32_t*)element)) { 209 return -1; 210 } 211 if ((*(uint32_t*)key) > 212 (*(uint32_t*)element) + ((uint32_t*)element)[1] / 65536) { 213 return 1; 214 } 215 return 0; 216} 217 218int CheckCodeRange(uint8_t* codes, 219 int size, 220 CMap_CodeRange* pRanges, 221 int nRanges) { 222 int iSeg = nRanges - 1; 223 while (iSeg >= 0) { 224 if (pRanges[iSeg].m_CharSize < size) { 225 --iSeg; 226 continue; 227 } 228 int iChar = 0; 229 while (iChar < size) { 230 if (codes[iChar] < pRanges[iSeg].m_Lower[iChar] || 231 codes[iChar] > pRanges[iSeg].m_Upper[iChar]) { 232 break; 233 } 234 ++iChar; 235 } 236 if (iChar == pRanges[iSeg].m_CharSize) 237 return 2; 238 239 if (iChar) 240 return (size == pRanges[iSeg].m_CharSize) ? 2 : 1; 241 iSeg--; 242 } 243 return 0; 244} 245 246int GetCharSizeImpl(uint32_t charcode, 247 CMap_CodeRange* pRanges, 248 int iRangesSize) { 249 if (!iRangesSize) 250 return 1; 251 252 uint8_t codes[4]; 253 codes[0] = codes[1] = 0x00; 254 codes[2] = (uint8_t)(charcode >> 8 & 0xFF); 255 codes[3] = (uint8_t)charcode; 256 int offset = 0; 257 int size = 4; 258 for (int i = 0; i < 4; ++i) { 259 int iSeg = iRangesSize - 1; 260 while (iSeg >= 0) { 261 if (pRanges[iSeg].m_CharSize < size) { 262 --iSeg; 263 continue; 264 } 265 int iChar = 0; 266 while (iChar < size) { 267 if (codes[offset + iChar] < pRanges[iSeg].m_Lower[iChar] || 268 codes[offset + iChar] > pRanges[iSeg].m_Upper[iChar]) { 269 break; 270 } 271 ++iChar; 272 } 273 if (iChar == pRanges[iSeg].m_CharSize) 274 return size; 275 --iSeg; 276 } 277 --size; 278 ++offset; 279 } 280 return 1; 281} 282 283} // namespace 284 285CPDF_CMapManager::CPDF_CMapManager() {} 286 287CPDF_CMapManager::~CPDF_CMapManager() {} 288 289CFX_MaybeOwned<CPDF_CMap> CPDF_CMapManager::GetPredefinedCMap( 290 const CFX_ByteString& name, 291 bool bPromptCJK) { 292 auto it = m_CMaps.find(name); 293 if (it != m_CMaps.end()) 294 return CFX_MaybeOwned<CPDF_CMap>(it->second.get()); // Unowned. 295 296 std::unique_ptr<CPDF_CMap> pCMap = LoadPredefinedCMap(name, bPromptCJK); 297 if (name.IsEmpty()) 298 return CFX_MaybeOwned<CPDF_CMap>(std::move(pCMap)); // Owned. 299 300 CPDF_CMap* pUnowned = pCMap.get(); 301 m_CMaps[name] = std::move(pCMap); 302 return CFX_MaybeOwned<CPDF_CMap>(pUnowned); // Unowned. 303} 304 305std::unique_ptr<CPDF_CMap> CPDF_CMapManager::LoadPredefinedCMap( 306 const CFX_ByteString& name, 307 bool bPromptCJK) { 308 auto pCMap = pdfium::MakeUnique<CPDF_CMap>(); 309 const FX_CHAR* pname = name.c_str(); 310 if (*pname == '/') 311 pname++; 312 313 pCMap->LoadPredefined(this, pname, bPromptCJK); 314 return pCMap; 315} 316 317CPDF_CID2UnicodeMap* CPDF_CMapManager::GetCID2UnicodeMap(CIDSet charset, 318 bool bPromptCJK) { 319 if (!m_CID2UnicodeMaps[charset]) 320 m_CID2UnicodeMaps[charset] = LoadCID2UnicodeMap(charset, bPromptCJK); 321 322 return m_CID2UnicodeMaps[charset].get(); 323} 324 325std::unique_ptr<CPDF_CID2UnicodeMap> CPDF_CMapManager::LoadCID2UnicodeMap( 326 CIDSet charset, 327 bool bPromptCJK) { 328 auto pMap = pdfium::MakeUnique<CPDF_CID2UnicodeMap>(); 329 pMap->Load(this, charset, bPromptCJK); 330 return pMap; 331} 332 333CPDF_CMapParser::CPDF_CMapParser() 334 : m_pCMap(nullptr), m_Status(0), m_CodeSeq(0) {} 335 336CPDF_CMapParser::~CPDF_CMapParser() {} 337 338void CPDF_CMapParser::Initialize(CPDF_CMap* pCMap) { 339 m_pCMap = pCMap; 340 m_Status = 0; 341 m_CodeSeq = 0; 342 m_AddMaps.EstimateSize(0, 10240); 343} 344 345void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) { 346 if (word.IsEmpty()) { 347 return; 348 } 349 if (word == "begincidchar") { 350 m_Status = 1; 351 m_CodeSeq = 0; 352 } else if (word == "begincidrange") { 353 m_Status = 2; 354 m_CodeSeq = 0; 355 } else if (word == "endcidrange" || word == "endcidchar") { 356 m_Status = 0; 357 } else if (word == "/WMode") { 358 m_Status = 6; 359 } else if (word == "/Registry") { 360 m_Status = 3; 361 } else if (word == "/Ordering") { 362 m_Status = 4; 363 } else if (word == "/Supplement") { 364 m_Status = 5; 365 } else if (word == "begincodespacerange") { 366 m_Status = 7; 367 m_CodeSeq = 0; 368 } else if (word == "usecmap") { 369 } else if (m_Status == 1 || m_Status == 2) { 370 m_CodePoints[m_CodeSeq] = CMap_GetCode(word); 371 m_CodeSeq++; 372 uint32_t StartCode, EndCode; 373 uint16_t StartCID; 374 if (m_Status == 1) { 375 if (m_CodeSeq < 2) { 376 return; 377 } 378 EndCode = StartCode = m_CodePoints[0]; 379 StartCID = (uint16_t)m_CodePoints[1]; 380 } else { 381 if (m_CodeSeq < 3) { 382 return; 383 } 384 StartCode = m_CodePoints[0]; 385 EndCode = m_CodePoints[1]; 386 StartCID = (uint16_t)m_CodePoints[2]; 387 } 388 if (EndCode < 0x10000) { 389 for (uint32_t code = StartCode; code <= EndCode; code++) { 390 m_pCMap->m_pMapping[code] = (uint16_t)(StartCID + code - StartCode); 391 } 392 } else { 393 uint32_t buf[2]; 394 buf[0] = StartCode; 395 buf[1] = ((EndCode - StartCode) << 16) + StartCID; 396 m_AddMaps.AppendBlock(buf, sizeof buf); 397 } 398 m_CodeSeq = 0; 399 } else if (m_Status == 3) { 400 m_Status = 0; 401 } else if (m_Status == 4) { 402 m_pCMap->m_Charset = CharsetFromOrdering(CMap_GetString(word)); 403 m_Status = 0; 404 } else if (m_Status == 5) { 405 m_Status = 0; 406 } else if (m_Status == 6) { 407 m_pCMap->m_bVertical = CMap_GetCode(word) != 0; 408 m_Status = 0; 409 } else if (m_Status == 7) { 410 if (word == "endcodespacerange") { 411 uint32_t nSegs = pdfium::CollectionSize<uint32_t>(m_CodeRanges); 412 if (nSegs > 1) { 413 m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes; 414 m_pCMap->m_nCodeRanges = nSegs; 415 FX_Free(m_pCMap->m_pLeadingBytes); 416 m_pCMap->m_pLeadingBytes = 417 FX_Alloc2D(uint8_t, nSegs, sizeof(CMap_CodeRange)); 418 FXSYS_memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(), 419 nSegs * sizeof(CMap_CodeRange)); 420 } else if (nSegs == 1) { 421 m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2) 422 ? CPDF_CMap::TwoBytes 423 : CPDF_CMap::OneByte; 424 } 425 m_Status = 0; 426 } else { 427 if (word.GetLength() == 0 || word.GetAt(0) != '<') { 428 return; 429 } 430 if (m_CodeSeq % 2) { 431 CMap_CodeRange range; 432 if (CMap_GetCodeRange(range, m_LastWord.AsStringC(), word)) 433 m_CodeRanges.push_back(range); 434 } 435 m_CodeSeq++; 436 } 437 } 438 m_LastWord = word; 439} 440 441// Static. 442uint32_t CPDF_CMapParser::CMap_GetCode(const CFX_ByteStringC& word) { 443 pdfium::base::CheckedNumeric<uint32_t> num = 0; 444 if (word.GetAt(0) == '<') { 445 for (int i = 1; i < word.GetLength() && std::isxdigit(word.GetAt(i)); ++i) { 446 num = num * 16 + FXSYS_toHexDigit(word.GetAt(i)); 447 if (!num.IsValid()) 448 return 0; 449 } 450 return num.ValueOrDie(); 451 } 452 453 for (int i = 0; i < word.GetLength() && std::isdigit(word.GetAt(i)); ++i) { 454 num = num * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(word.GetAt(i))); 455 if (!num.IsValid()) 456 return 0; 457 } 458 return num.ValueOrDie(); 459} 460 461// Static. 462bool CPDF_CMapParser::CMap_GetCodeRange(CMap_CodeRange& range, 463 const CFX_ByteStringC& first, 464 const CFX_ByteStringC& second) { 465 if (first.GetLength() == 0 || first.GetAt(0) != '<') 466 return false; 467 468 int i; 469 for (i = 1; i < first.GetLength(); ++i) { 470 if (first.GetAt(i) == '>') { 471 break; 472 } 473 } 474 range.m_CharSize = (i - 1) / 2; 475 if (range.m_CharSize > 4) 476 return false; 477 478 for (i = 0; i < range.m_CharSize; ++i) { 479 uint8_t digit1 = first.GetAt(i * 2 + 1); 480 uint8_t digit2 = first.GetAt(i * 2 + 2); 481 range.m_Lower[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2); 482 } 483 484 uint32_t size = second.GetLength(); 485 for (i = 0; i < range.m_CharSize; ++i) { 486 uint8_t digit1 = ((uint32_t)i * 2 + 1 < size) 487 ? second.GetAt((FX_STRSIZE)i * 2 + 1) 488 : '0'; 489 uint8_t digit2 = ((uint32_t)i * 2 + 2 < size) 490 ? second.GetAt((FX_STRSIZE)i * 2 + 2) 491 : '0'; 492 range.m_Upper[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2); 493 } 494 return true; 495} 496 497CPDF_CMap::CPDF_CMap() { 498 m_Charset = CIDSET_UNKNOWN; 499 m_Coding = CIDCODING_UNKNOWN; 500 m_CodingScheme = TwoBytes; 501 m_bVertical = false; 502 m_bLoaded = false; 503 m_pMapping = nullptr; 504 m_pLeadingBytes = nullptr; 505 m_pAddMapping = nullptr; 506 m_pEmbedMap = nullptr; 507 m_nCodeRanges = 0; 508} 509CPDF_CMap::~CPDF_CMap() { 510 FX_Free(m_pMapping); 511 FX_Free(m_pAddMapping); 512 FX_Free(m_pLeadingBytes); 513} 514 515bool CPDF_CMap::IsLoaded() const { 516 return m_bLoaded; 517} 518 519bool CPDF_CMap::IsVertWriting() const { 520 return m_bVertical; 521} 522 523void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr, 524 const CFX_ByteString& bsName, 525 bool bPromptCJK) { 526 m_PredefinedCMap = bsName; 527 if (m_PredefinedCMap == "Identity-H" || m_PredefinedCMap == "Identity-V") { 528 m_Coding = CIDCODING_CID; 529 m_bVertical = bsName[9] == 'V'; 530 m_bLoaded = true; 531 return; 532 } 533 CFX_ByteString cmapid = m_PredefinedCMap; 534 m_bVertical = cmapid.Right(1) == "V"; 535 if (cmapid.GetLength() > 2) { 536 cmapid = cmapid.Left(cmapid.GetLength() - 2); 537 } 538 const CPDF_PredefinedCMap* map = nullptr; 539 for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) { 540 if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) { 541 map = &g_PredefinedCMaps[i]; 542 break; 543 } 544 } 545 if (!map) 546 return; 547 548 m_Charset = map->m_Charset; 549 m_Coding = map->m_Coding; 550 m_CodingScheme = map->m_CodingScheme; 551 if (m_CodingScheme == MixedTwoBytes) { 552 m_pLeadingBytes = FX_Alloc(uint8_t, 256); 553 for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) { 554 const uint8_t* segs = map->m_LeadingSegs; 555 for (int b = segs[i * 2]; b <= segs[i * 2 + 1]; ++b) { 556 m_pLeadingBytes[b] = 1; 557 } 558 } 559 } 560 FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap); 561 if (!m_pEmbedMap) 562 return; 563 564 m_bLoaded = true; 565} 566 567void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) { 568 m_pMapping = FX_Alloc(uint16_t, 65536); 569 CPDF_CMapParser parser; 570 parser.Initialize(this); 571 CPDF_SimpleParser syntax(pData, size); 572 while (1) { 573 CFX_ByteStringC word = syntax.GetWord(); 574 if (word.IsEmpty()) { 575 break; 576 } 577 parser.ParseWord(word); 578 } 579 if (m_CodingScheme == MixedFourBytes && parser.m_AddMaps.GetSize()) { 580 m_pAddMapping = FX_Alloc(uint8_t, parser.m_AddMaps.GetSize() + 4); 581 *(uint32_t*)m_pAddMapping = parser.m_AddMaps.GetSize() / 8; 582 FXSYS_memcpy(m_pAddMapping + 4, parser.m_AddMaps.GetBuffer(), 583 parser.m_AddMaps.GetSize()); 584 FXSYS_qsort(m_pAddMapping + 4, parser.m_AddMaps.GetSize() / 8, 8, 585 CompareDWORD); 586 } 587} 588 589uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const { 590 if (m_Coding == CIDCODING_CID) { 591 return (uint16_t)charcode; 592 } 593 if (m_pEmbedMap) { 594 return FPDFAPI_CIDFromCharCode(m_pEmbedMap, charcode); 595 } 596 if (!m_pMapping) { 597 return (uint16_t)charcode; 598 } 599 if (charcode >> 16) { 600 if (m_pAddMapping) { 601 void* found = FXSYS_bsearch(&charcode, m_pAddMapping + 4, 602 *(uint32_t*)m_pAddMapping, 8, CompareCID); 603 if (!found) 604 return 0; 605 return (uint16_t)(((uint32_t*)found)[1] % 65536 + charcode - 606 *(uint32_t*)found); 607 } 608 return 0; 609 } 610 return m_pMapping[charcode]; 611} 612 613uint32_t CPDF_CMap::GetNextChar(const FX_CHAR* pString, 614 int nStrLen, 615 int& offset) const { 616 switch (m_CodingScheme) { 617 case OneByte: 618 return ((uint8_t*)pString)[offset++]; 619 case TwoBytes: 620 offset += 2; 621 return ((uint8_t*)pString)[offset - 2] * 256 + 622 ((uint8_t*)pString)[offset - 1]; 623 case MixedTwoBytes: { 624 uint8_t byte1 = ((uint8_t*)pString)[offset++]; 625 if (!m_pLeadingBytes[byte1]) { 626 return byte1; 627 } 628 uint8_t byte2 = ((uint8_t*)pString)[offset++]; 629 return byte1 * 256 + byte2; 630 } 631 case MixedFourBytes: { 632 uint8_t codes[4]; 633 int char_size = 1; 634 codes[0] = ((uint8_t*)pString)[offset++]; 635 CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes; 636 while (1) { 637 int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges); 638 if (ret == 0) { 639 return 0; 640 } 641 if (ret == 2) { 642 uint32_t charcode = 0; 643 for (int i = 0; i < char_size; i++) { 644 charcode = (charcode << 8) + codes[i]; 645 } 646 return charcode; 647 } 648 if (char_size == 4 || offset == nStrLen) { 649 return 0; 650 } 651 codes[char_size++] = ((uint8_t*)pString)[offset++]; 652 } 653 break; 654 } 655 } 656 return 0; 657} 658int CPDF_CMap::GetCharSize(uint32_t charcode) const { 659 switch (m_CodingScheme) { 660 case OneByte: 661 return 1; 662 case TwoBytes: 663 return 2; 664 case MixedTwoBytes: 665 case MixedFourBytes: 666 if (charcode < 0x100) { 667 return 1; 668 } 669 if (charcode < 0x10000) { 670 return 2; 671 } 672 if (charcode < 0x1000000) { 673 return 3; 674 } 675 return 4; 676 } 677 return 1; 678} 679int CPDF_CMap::CountChar(const FX_CHAR* pString, int size) const { 680 switch (m_CodingScheme) { 681 case OneByte: 682 return size; 683 case TwoBytes: 684 return (size + 1) / 2; 685 case MixedTwoBytes: { 686 int count = 0; 687 for (int i = 0; i < size; i++) { 688 count++; 689 if (m_pLeadingBytes[((uint8_t*)pString)[i]]) { 690 i++; 691 } 692 } 693 return count; 694 } 695 case MixedFourBytes: { 696 int count = 0, offset = 0; 697 while (offset < size) { 698 GetNextChar(pString, size, offset); 699 count++; 700 } 701 return count; 702 } 703 } 704 return size; 705} 706 707int CPDF_CMap::AppendChar(FX_CHAR* str, uint32_t charcode) const { 708 switch (m_CodingScheme) { 709 case OneByte: 710 str[0] = (uint8_t)charcode; 711 return 1; 712 case TwoBytes: 713 str[0] = (uint8_t)(charcode / 256); 714 str[1] = (uint8_t)(charcode % 256); 715 return 2; 716 case MixedTwoBytes: 717 case MixedFourBytes: 718 if (charcode < 0x100) { 719 CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes; 720 int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges); 721 if (iSize == 0) { 722 iSize = 1; 723 } 724 if (iSize > 1) { 725 FXSYS_memset(str, 0, sizeof(uint8_t) * iSize); 726 } 727 str[iSize - 1] = (uint8_t)charcode; 728 return iSize; 729 } 730 if (charcode < 0x10000) { 731 str[0] = (uint8_t)(charcode >> 8); 732 str[1] = (uint8_t)charcode; 733 return 2; 734 } 735 if (charcode < 0x1000000) { 736 str[0] = (uint8_t)(charcode >> 16); 737 str[1] = (uint8_t)(charcode >> 8); 738 str[2] = (uint8_t)charcode; 739 return 3; 740 } 741 str[0] = (uint8_t)(charcode >> 24); 742 str[1] = (uint8_t)(charcode >> 16); 743 str[2] = (uint8_t)(charcode >> 8); 744 str[3] = (uint8_t)charcode; 745 return 4; 746 } 747 return 0; 748} 749 750CPDF_CID2UnicodeMap::CPDF_CID2UnicodeMap() { 751 m_EmbeddedCount = 0; 752} 753 754CPDF_CID2UnicodeMap::~CPDF_CID2UnicodeMap() {} 755 756bool CPDF_CID2UnicodeMap::IsLoaded() { 757 return m_EmbeddedCount != 0; 758} 759 760FX_WCHAR CPDF_CID2UnicodeMap::UnicodeFromCID(uint16_t CID) { 761 if (m_Charset == CIDSET_UNICODE) { 762 return CID; 763 } 764 if (CID < m_EmbeddedCount) { 765 return m_pEmbeddedMap[CID]; 766 } 767 return 0; 768} 769 770void CPDF_CID2UnicodeMap::Load(CPDF_CMapManager* pMgr, 771 CIDSet charset, 772 bool bPromptCJK) { 773 m_Charset = charset; 774 775 CPDF_FontGlobals* pFontGlobals = 776 CPDF_ModuleMgr::Get()->GetPageModule()->GetFontGlobals(); 777 m_pEmbeddedMap = pFontGlobals->m_EmbeddedToUnicodes[charset].m_pMap; 778 m_EmbeddedCount = pFontGlobals->m_EmbeddedToUnicodes[charset].m_Count; 779} 780 781CIDSet CharsetFromOrdering(const CFX_ByteStringC& ordering) { 782 for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) { 783 if (ordering == g_CharsetNames[charset]) 784 return CIDSetFromSizeT(charset); 785 } 786 return CIDSET_UNKNOWN; 787} 788