1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/font/font_int.h"
8
9#include <memory>
10#include <utility>
11
12#include "core/fpdfapi/cmaps/cmap_int.h"
13#include "core/fpdfapi/cpdf_modulemgr.h"
14#include "core/fpdfapi/font/ttgsubtable.h"
15#include "core/fpdfapi/page/cpdf_pagemodule.h"
16#include "core/fpdfapi/parser/cpdf_array.h"
17#include "core/fpdfapi/parser/cpdf_dictionary.h"
18#include "core/fpdfapi/parser/cpdf_simple_parser.h"
19#include "core/fxcrt/fx_ext.h"
20#include "core/fxge/fx_freetype.h"
21#include "third_party/base/logging.h"
22#include "third_party/base/stl_util.h"
23
24namespace {
25
26const FX_CHAR* const g_CharsetNames[CIDSET_NUM_SETS] = {
27    nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"};
28
29class CPDF_PredefinedCMap {
30 public:
31  const FX_CHAR* m_pName;
32  CIDSet m_Charset;
33  CIDCoding m_Coding;
34  CPDF_CMap::CodingScheme m_CodingScheme;
35  uint8_t m_LeadingSegCount;
36  uint8_t m_LeadingSegs[4];
37};
38
39const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
40    {"GB-EUC",
41     CIDSET_GB1,
42     CIDCODING_GB,
43     CPDF_CMap::MixedTwoBytes,
44     1,
45     {0xa1, 0xfe}},
46    {"GBpc-EUC",
47     CIDSET_GB1,
48     CIDCODING_GB,
49     CPDF_CMap::MixedTwoBytes,
50     1,
51     {0xa1, 0xfc}},
52    {"GBK-EUC",
53     CIDSET_GB1,
54     CIDCODING_GB,
55     CPDF_CMap::MixedTwoBytes,
56     1,
57     {0x81, 0xfe}},
58    {"GBKp-EUC",
59     CIDSET_GB1,
60     CIDCODING_GB,
61     CPDF_CMap::MixedTwoBytes,
62     1,
63     {0x81, 0xfe}},
64    {"GBK2K-EUC",
65     CIDSET_GB1,
66     CIDCODING_GB,
67     CPDF_CMap::MixedTwoBytes,
68     1,
69     {0x81, 0xfe}},
70    {"GBK2K",
71     CIDSET_GB1,
72     CIDCODING_GB,
73     CPDF_CMap::MixedTwoBytes,
74     1,
75     {0x81, 0xfe}},
76    {"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
77    {"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
78    {"B5pc",
79     CIDSET_CNS1,
80     CIDCODING_BIG5,
81     CPDF_CMap::MixedTwoBytes,
82     1,
83     {0xa1, 0xfc}},
84    {"HKscs-B5",
85     CIDSET_CNS1,
86     CIDCODING_BIG5,
87     CPDF_CMap::MixedTwoBytes,
88     1,
89     {0x88, 0xfe}},
90    {"ETen-B5",
91     CIDSET_CNS1,
92     CIDCODING_BIG5,
93     CPDF_CMap::MixedTwoBytes,
94     1,
95     {0xa1, 0xfe}},
96    {"ETenms-B5",
97     CIDSET_CNS1,
98     CIDCODING_BIG5,
99     CPDF_CMap::MixedTwoBytes,
100     1,
101     {0xa1, 0xfe}},
102    {"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
103    {"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
104    {"83pv-RKSJ",
105     CIDSET_JAPAN1,
106     CIDCODING_JIS,
107     CPDF_CMap::MixedTwoBytes,
108     2,
109     {0x81, 0x9f, 0xe0, 0xfc}},
110    {"90ms-RKSJ",
111     CIDSET_JAPAN1,
112     CIDCODING_JIS,
113     CPDF_CMap::MixedTwoBytes,
114     2,
115     {0x81, 0x9f, 0xe0, 0xfc}},
116    {"90msp-RKSJ",
117     CIDSET_JAPAN1,
118     CIDCODING_JIS,
119     CPDF_CMap::MixedTwoBytes,
120     2,
121     {0x81, 0x9f, 0xe0, 0xfc}},
122    {"90pv-RKSJ",
123     CIDSET_JAPAN1,
124     CIDCODING_JIS,
125     CPDF_CMap::MixedTwoBytes,
126     2,
127     {0x81, 0x9f, 0xe0, 0xfc}},
128    {"Add-RKSJ",
129     CIDSET_JAPAN1,
130     CIDCODING_JIS,
131     CPDF_CMap::MixedTwoBytes,
132     2,
133     {0x81, 0x9f, 0xe0, 0xfc}},
134    {"EUC",
135     CIDSET_JAPAN1,
136     CIDCODING_JIS,
137     CPDF_CMap::MixedTwoBytes,
138     2,
139     {0x8e, 0x8e, 0xa1, 0xfe}},
140    {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
141    {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
142    {"Ext-RKSJ",
143     CIDSET_JAPAN1,
144     CIDCODING_JIS,
145     CPDF_CMap::MixedTwoBytes,
146     2,
147     {0x81, 0x9f, 0xe0, 0xfc}},
148    {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
149    {"UniJIS-UCS2-HW",
150     CIDSET_JAPAN1,
151     CIDCODING_UCS2,
152     CPDF_CMap::TwoBytes,
153     0,
154     {}},
155    {"UniJIS-UTF16",
156     CIDSET_JAPAN1,
157     CIDCODING_UTF16,
158     CPDF_CMap::TwoBytes,
159     0,
160     {}},
161    {"KSC-EUC",
162     CIDSET_KOREA1,
163     CIDCODING_KOREA,
164     CPDF_CMap::MixedTwoBytes,
165     1,
166     {0xa1, 0xfe}},
167    {"KSCms-UHC",
168     CIDSET_KOREA1,
169     CIDCODING_KOREA,
170     CPDF_CMap::MixedTwoBytes,
171     1,
172     {0x81, 0xfe}},
173    {"KSCms-UHC-HW",
174     CIDSET_KOREA1,
175     CIDCODING_KOREA,
176     CPDF_CMap::MixedTwoBytes,
177     1,
178     {0x81, 0xfe}},
179    {"KSCpc-EUC",
180     CIDSET_KOREA1,
181     CIDCODING_KOREA,
182     CPDF_CMap::MixedTwoBytes,
183     1,
184     {0xa1, 0xfd}},
185    {"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
186    {"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
187};
188
189CIDSet CIDSetFromSizeT(size_t index) {
190  if (index >= CIDSET_NUM_SETS) {
191    NOTREACHED();
192    return CIDSET_UNKNOWN;
193  }
194  return static_cast<CIDSet>(index);
195}
196
197CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) {
198  if (word.GetLength() <= 2)
199    return CFX_ByteStringC();
200  return CFX_ByteStringC(&word[1], word.GetLength() - 2);
201}
202
203int CompareDWORD(const void* data1, const void* data2) {
204  return (*(uint32_t*)data1) - (*(uint32_t*)data2);
205}
206
207int CompareCID(const void* key, const void* element) {
208  if ((*(uint32_t*)key) < (*(uint32_t*)element)) {
209    return -1;
210  }
211  if ((*(uint32_t*)key) >
212      (*(uint32_t*)element) + ((uint32_t*)element)[1] / 65536) {
213    return 1;
214  }
215  return 0;
216}
217
218int CheckCodeRange(uint8_t* codes,
219                   int size,
220                   CMap_CodeRange* pRanges,
221                   int nRanges) {
222  int iSeg = nRanges - 1;
223  while (iSeg >= 0) {
224    if (pRanges[iSeg].m_CharSize < size) {
225      --iSeg;
226      continue;
227    }
228    int iChar = 0;
229    while (iChar < size) {
230      if (codes[iChar] < pRanges[iSeg].m_Lower[iChar] ||
231          codes[iChar] > pRanges[iSeg].m_Upper[iChar]) {
232        break;
233      }
234      ++iChar;
235    }
236    if (iChar == pRanges[iSeg].m_CharSize)
237      return 2;
238
239    if (iChar)
240      return (size == pRanges[iSeg].m_CharSize) ? 2 : 1;
241    iSeg--;
242  }
243  return 0;
244}
245
246int GetCharSizeImpl(uint32_t charcode,
247                    CMap_CodeRange* pRanges,
248                    int iRangesSize) {
249  if (!iRangesSize)
250    return 1;
251
252  uint8_t codes[4];
253  codes[0] = codes[1] = 0x00;
254  codes[2] = (uint8_t)(charcode >> 8 & 0xFF);
255  codes[3] = (uint8_t)charcode;
256  int offset = 0;
257  int size = 4;
258  for (int i = 0; i < 4; ++i) {
259    int iSeg = iRangesSize - 1;
260    while (iSeg >= 0) {
261      if (pRanges[iSeg].m_CharSize < size) {
262        --iSeg;
263        continue;
264      }
265      int iChar = 0;
266      while (iChar < size) {
267        if (codes[offset + iChar] < pRanges[iSeg].m_Lower[iChar] ||
268            codes[offset + iChar] > pRanges[iSeg].m_Upper[iChar]) {
269          break;
270        }
271        ++iChar;
272      }
273      if (iChar == pRanges[iSeg].m_CharSize)
274        return size;
275      --iSeg;
276    }
277    --size;
278    ++offset;
279  }
280  return 1;
281}
282
283}  // namespace
284
285CPDF_CMapManager::CPDF_CMapManager() {}
286
287CPDF_CMapManager::~CPDF_CMapManager() {}
288
289CFX_MaybeOwned<CPDF_CMap> CPDF_CMapManager::GetPredefinedCMap(
290    const CFX_ByteString& name,
291    bool bPromptCJK) {
292  auto it = m_CMaps.find(name);
293  if (it != m_CMaps.end())
294    return CFX_MaybeOwned<CPDF_CMap>(it->second.get());  // Unowned.
295
296  std::unique_ptr<CPDF_CMap> pCMap = LoadPredefinedCMap(name, bPromptCJK);
297  if (name.IsEmpty())
298    return CFX_MaybeOwned<CPDF_CMap>(std::move(pCMap));  // Owned.
299
300  CPDF_CMap* pUnowned = pCMap.get();
301  m_CMaps[name] = std::move(pCMap);
302  return CFX_MaybeOwned<CPDF_CMap>(pUnowned);  // Unowned.
303}
304
305std::unique_ptr<CPDF_CMap> CPDF_CMapManager::LoadPredefinedCMap(
306    const CFX_ByteString& name,
307    bool bPromptCJK) {
308  auto pCMap = pdfium::MakeUnique<CPDF_CMap>();
309  const FX_CHAR* pname = name.c_str();
310  if (*pname == '/')
311    pname++;
312
313  pCMap->LoadPredefined(this, pname, bPromptCJK);
314  return pCMap;
315}
316
317CPDF_CID2UnicodeMap* CPDF_CMapManager::GetCID2UnicodeMap(CIDSet charset,
318                                                         bool bPromptCJK) {
319  if (!m_CID2UnicodeMaps[charset])
320    m_CID2UnicodeMaps[charset] = LoadCID2UnicodeMap(charset, bPromptCJK);
321
322  return m_CID2UnicodeMaps[charset].get();
323}
324
325std::unique_ptr<CPDF_CID2UnicodeMap> CPDF_CMapManager::LoadCID2UnicodeMap(
326    CIDSet charset,
327    bool bPromptCJK) {
328  auto pMap = pdfium::MakeUnique<CPDF_CID2UnicodeMap>();
329  pMap->Load(this, charset, bPromptCJK);
330  return pMap;
331}
332
333CPDF_CMapParser::CPDF_CMapParser()
334    : m_pCMap(nullptr), m_Status(0), m_CodeSeq(0) {}
335
336CPDF_CMapParser::~CPDF_CMapParser() {}
337
338void CPDF_CMapParser::Initialize(CPDF_CMap* pCMap) {
339  m_pCMap = pCMap;
340  m_Status = 0;
341  m_CodeSeq = 0;
342  m_AddMaps.EstimateSize(0, 10240);
343}
344
345void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) {
346  if (word.IsEmpty()) {
347    return;
348  }
349  if (word == "begincidchar") {
350    m_Status = 1;
351    m_CodeSeq = 0;
352  } else if (word == "begincidrange") {
353    m_Status = 2;
354    m_CodeSeq = 0;
355  } else if (word == "endcidrange" || word == "endcidchar") {
356    m_Status = 0;
357  } else if (word == "/WMode") {
358    m_Status = 6;
359  } else if (word == "/Registry") {
360    m_Status = 3;
361  } else if (word == "/Ordering") {
362    m_Status = 4;
363  } else if (word == "/Supplement") {
364    m_Status = 5;
365  } else if (word == "begincodespacerange") {
366    m_Status = 7;
367    m_CodeSeq = 0;
368  } else if (word == "usecmap") {
369  } else if (m_Status == 1 || m_Status == 2) {
370    m_CodePoints[m_CodeSeq] = CMap_GetCode(word);
371    m_CodeSeq++;
372    uint32_t StartCode, EndCode;
373    uint16_t StartCID;
374    if (m_Status == 1) {
375      if (m_CodeSeq < 2) {
376        return;
377      }
378      EndCode = StartCode = m_CodePoints[0];
379      StartCID = (uint16_t)m_CodePoints[1];
380    } else {
381      if (m_CodeSeq < 3) {
382        return;
383      }
384      StartCode = m_CodePoints[0];
385      EndCode = m_CodePoints[1];
386      StartCID = (uint16_t)m_CodePoints[2];
387    }
388    if (EndCode < 0x10000) {
389      for (uint32_t code = StartCode; code <= EndCode; code++) {
390        m_pCMap->m_pMapping[code] = (uint16_t)(StartCID + code - StartCode);
391      }
392    } else {
393      uint32_t buf[2];
394      buf[0] = StartCode;
395      buf[1] = ((EndCode - StartCode) << 16) + StartCID;
396      m_AddMaps.AppendBlock(buf, sizeof buf);
397    }
398    m_CodeSeq = 0;
399  } else if (m_Status == 3) {
400    m_Status = 0;
401  } else if (m_Status == 4) {
402    m_pCMap->m_Charset = CharsetFromOrdering(CMap_GetString(word));
403    m_Status = 0;
404  } else if (m_Status == 5) {
405    m_Status = 0;
406  } else if (m_Status == 6) {
407    m_pCMap->m_bVertical = CMap_GetCode(word) != 0;
408    m_Status = 0;
409  } else if (m_Status == 7) {
410    if (word == "endcodespacerange") {
411      uint32_t nSegs = pdfium::CollectionSize<uint32_t>(m_CodeRanges);
412      if (nSegs > 1) {
413        m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes;
414        m_pCMap->m_nCodeRanges = nSegs;
415        FX_Free(m_pCMap->m_pLeadingBytes);
416        m_pCMap->m_pLeadingBytes =
417            FX_Alloc2D(uint8_t, nSegs, sizeof(CMap_CodeRange));
418        FXSYS_memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(),
419                     nSegs * sizeof(CMap_CodeRange));
420      } else if (nSegs == 1) {
421        m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2)
422                                      ? CPDF_CMap::TwoBytes
423                                      : CPDF_CMap::OneByte;
424      }
425      m_Status = 0;
426    } else {
427      if (word.GetLength() == 0 || word.GetAt(0) != '<') {
428        return;
429      }
430      if (m_CodeSeq % 2) {
431        CMap_CodeRange range;
432        if (CMap_GetCodeRange(range, m_LastWord.AsStringC(), word))
433          m_CodeRanges.push_back(range);
434      }
435      m_CodeSeq++;
436    }
437  }
438  m_LastWord = word;
439}
440
441// Static.
442uint32_t CPDF_CMapParser::CMap_GetCode(const CFX_ByteStringC& word) {
443  pdfium::base::CheckedNumeric<uint32_t> num = 0;
444  if (word.GetAt(0) == '<') {
445    for (int i = 1; i < word.GetLength() && std::isxdigit(word.GetAt(i)); ++i) {
446      num = num * 16 + FXSYS_toHexDigit(word.GetAt(i));
447      if (!num.IsValid())
448        return 0;
449    }
450    return num.ValueOrDie();
451  }
452
453  for (int i = 0; i < word.GetLength() && std::isdigit(word.GetAt(i)); ++i) {
454    num = num * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(word.GetAt(i)));
455    if (!num.IsValid())
456      return 0;
457  }
458  return num.ValueOrDie();
459}
460
461// Static.
462bool CPDF_CMapParser::CMap_GetCodeRange(CMap_CodeRange& range,
463                                        const CFX_ByteStringC& first,
464                                        const CFX_ByteStringC& second) {
465  if (first.GetLength() == 0 || first.GetAt(0) != '<')
466    return false;
467
468  int i;
469  for (i = 1; i < first.GetLength(); ++i) {
470    if (first.GetAt(i) == '>') {
471      break;
472    }
473  }
474  range.m_CharSize = (i - 1) / 2;
475  if (range.m_CharSize > 4)
476    return false;
477
478  for (i = 0; i < range.m_CharSize; ++i) {
479    uint8_t digit1 = first.GetAt(i * 2 + 1);
480    uint8_t digit2 = first.GetAt(i * 2 + 2);
481    range.m_Lower[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2);
482  }
483
484  uint32_t size = second.GetLength();
485  for (i = 0; i < range.m_CharSize; ++i) {
486    uint8_t digit1 = ((uint32_t)i * 2 + 1 < size)
487                         ? second.GetAt((FX_STRSIZE)i * 2 + 1)
488                         : '0';
489    uint8_t digit2 = ((uint32_t)i * 2 + 2 < size)
490                         ? second.GetAt((FX_STRSIZE)i * 2 + 2)
491                         : '0';
492    range.m_Upper[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2);
493  }
494  return true;
495}
496
497CPDF_CMap::CPDF_CMap() {
498  m_Charset = CIDSET_UNKNOWN;
499  m_Coding = CIDCODING_UNKNOWN;
500  m_CodingScheme = TwoBytes;
501  m_bVertical = false;
502  m_bLoaded = false;
503  m_pMapping = nullptr;
504  m_pLeadingBytes = nullptr;
505  m_pAddMapping = nullptr;
506  m_pEmbedMap = nullptr;
507  m_nCodeRanges = 0;
508}
509CPDF_CMap::~CPDF_CMap() {
510  FX_Free(m_pMapping);
511  FX_Free(m_pAddMapping);
512  FX_Free(m_pLeadingBytes);
513}
514
515bool CPDF_CMap::IsLoaded() const {
516  return m_bLoaded;
517}
518
519bool CPDF_CMap::IsVertWriting() const {
520  return m_bVertical;
521}
522
523void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
524                               const CFX_ByteString& bsName,
525                               bool bPromptCJK) {
526  m_PredefinedCMap = bsName;
527  if (m_PredefinedCMap == "Identity-H" || m_PredefinedCMap == "Identity-V") {
528    m_Coding = CIDCODING_CID;
529    m_bVertical = bsName[9] == 'V';
530    m_bLoaded = true;
531    return;
532  }
533  CFX_ByteString cmapid = m_PredefinedCMap;
534  m_bVertical = cmapid.Right(1) == "V";
535  if (cmapid.GetLength() > 2) {
536    cmapid = cmapid.Left(cmapid.GetLength() - 2);
537  }
538  const CPDF_PredefinedCMap* map = nullptr;
539  for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) {
540    if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) {
541      map = &g_PredefinedCMaps[i];
542      break;
543    }
544  }
545  if (!map)
546    return;
547
548  m_Charset = map->m_Charset;
549  m_Coding = map->m_Coding;
550  m_CodingScheme = map->m_CodingScheme;
551  if (m_CodingScheme == MixedTwoBytes) {
552    m_pLeadingBytes = FX_Alloc(uint8_t, 256);
553    for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) {
554      const uint8_t* segs = map->m_LeadingSegs;
555      for (int b = segs[i * 2]; b <= segs[i * 2 + 1]; ++b) {
556        m_pLeadingBytes[b] = 1;
557      }
558    }
559  }
560  FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap);
561  if (!m_pEmbedMap)
562    return;
563
564  m_bLoaded = true;
565}
566
567void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) {
568  m_pMapping = FX_Alloc(uint16_t, 65536);
569  CPDF_CMapParser parser;
570  parser.Initialize(this);
571  CPDF_SimpleParser syntax(pData, size);
572  while (1) {
573    CFX_ByteStringC word = syntax.GetWord();
574    if (word.IsEmpty()) {
575      break;
576    }
577    parser.ParseWord(word);
578  }
579  if (m_CodingScheme == MixedFourBytes && parser.m_AddMaps.GetSize()) {
580    m_pAddMapping = FX_Alloc(uint8_t, parser.m_AddMaps.GetSize() + 4);
581    *(uint32_t*)m_pAddMapping = parser.m_AddMaps.GetSize() / 8;
582    FXSYS_memcpy(m_pAddMapping + 4, parser.m_AddMaps.GetBuffer(),
583                 parser.m_AddMaps.GetSize());
584    FXSYS_qsort(m_pAddMapping + 4, parser.m_AddMaps.GetSize() / 8, 8,
585                CompareDWORD);
586  }
587}
588
589uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const {
590  if (m_Coding == CIDCODING_CID) {
591    return (uint16_t)charcode;
592  }
593  if (m_pEmbedMap) {
594    return FPDFAPI_CIDFromCharCode(m_pEmbedMap, charcode);
595  }
596  if (!m_pMapping) {
597    return (uint16_t)charcode;
598  }
599  if (charcode >> 16) {
600    if (m_pAddMapping) {
601      void* found = FXSYS_bsearch(&charcode, m_pAddMapping + 4,
602                                  *(uint32_t*)m_pAddMapping, 8, CompareCID);
603      if (!found)
604        return 0;
605      return (uint16_t)(((uint32_t*)found)[1] % 65536 + charcode -
606                        *(uint32_t*)found);
607    }
608    return 0;
609  }
610  return m_pMapping[charcode];
611}
612
613uint32_t CPDF_CMap::GetNextChar(const FX_CHAR* pString,
614                                int nStrLen,
615                                int& offset) const {
616  switch (m_CodingScheme) {
617    case OneByte:
618      return ((uint8_t*)pString)[offset++];
619    case TwoBytes:
620      offset += 2;
621      return ((uint8_t*)pString)[offset - 2] * 256 +
622             ((uint8_t*)pString)[offset - 1];
623    case MixedTwoBytes: {
624      uint8_t byte1 = ((uint8_t*)pString)[offset++];
625      if (!m_pLeadingBytes[byte1]) {
626        return byte1;
627      }
628      uint8_t byte2 = ((uint8_t*)pString)[offset++];
629      return byte1 * 256 + byte2;
630    }
631    case MixedFourBytes: {
632      uint8_t codes[4];
633      int char_size = 1;
634      codes[0] = ((uint8_t*)pString)[offset++];
635      CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes;
636      while (1) {
637        int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges);
638        if (ret == 0) {
639          return 0;
640        }
641        if (ret == 2) {
642          uint32_t charcode = 0;
643          for (int i = 0; i < char_size; i++) {
644            charcode = (charcode << 8) + codes[i];
645          }
646          return charcode;
647        }
648        if (char_size == 4 || offset == nStrLen) {
649          return 0;
650        }
651        codes[char_size++] = ((uint8_t*)pString)[offset++];
652      }
653      break;
654    }
655  }
656  return 0;
657}
658int CPDF_CMap::GetCharSize(uint32_t charcode) const {
659  switch (m_CodingScheme) {
660    case OneByte:
661      return 1;
662    case TwoBytes:
663      return 2;
664    case MixedTwoBytes:
665    case MixedFourBytes:
666      if (charcode < 0x100) {
667        return 1;
668      }
669      if (charcode < 0x10000) {
670        return 2;
671      }
672      if (charcode < 0x1000000) {
673        return 3;
674      }
675      return 4;
676  }
677  return 1;
678}
679int CPDF_CMap::CountChar(const FX_CHAR* pString, int size) const {
680  switch (m_CodingScheme) {
681    case OneByte:
682      return size;
683    case TwoBytes:
684      return (size + 1) / 2;
685    case MixedTwoBytes: {
686      int count = 0;
687      for (int i = 0; i < size; i++) {
688        count++;
689        if (m_pLeadingBytes[((uint8_t*)pString)[i]]) {
690          i++;
691        }
692      }
693      return count;
694    }
695    case MixedFourBytes: {
696      int count = 0, offset = 0;
697      while (offset < size) {
698        GetNextChar(pString, size, offset);
699        count++;
700      }
701      return count;
702    }
703  }
704  return size;
705}
706
707int CPDF_CMap::AppendChar(FX_CHAR* str, uint32_t charcode) const {
708  switch (m_CodingScheme) {
709    case OneByte:
710      str[0] = (uint8_t)charcode;
711      return 1;
712    case TwoBytes:
713      str[0] = (uint8_t)(charcode / 256);
714      str[1] = (uint8_t)(charcode % 256);
715      return 2;
716    case MixedTwoBytes:
717    case MixedFourBytes:
718      if (charcode < 0x100) {
719        CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes;
720        int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges);
721        if (iSize == 0) {
722          iSize = 1;
723        }
724        if (iSize > 1) {
725          FXSYS_memset(str, 0, sizeof(uint8_t) * iSize);
726        }
727        str[iSize - 1] = (uint8_t)charcode;
728        return iSize;
729      }
730      if (charcode < 0x10000) {
731        str[0] = (uint8_t)(charcode >> 8);
732        str[1] = (uint8_t)charcode;
733        return 2;
734      }
735      if (charcode < 0x1000000) {
736        str[0] = (uint8_t)(charcode >> 16);
737        str[1] = (uint8_t)(charcode >> 8);
738        str[2] = (uint8_t)charcode;
739        return 3;
740      }
741      str[0] = (uint8_t)(charcode >> 24);
742      str[1] = (uint8_t)(charcode >> 16);
743      str[2] = (uint8_t)(charcode >> 8);
744      str[3] = (uint8_t)charcode;
745      return 4;
746  }
747  return 0;
748}
749
750CPDF_CID2UnicodeMap::CPDF_CID2UnicodeMap() {
751  m_EmbeddedCount = 0;
752}
753
754CPDF_CID2UnicodeMap::~CPDF_CID2UnicodeMap() {}
755
756bool CPDF_CID2UnicodeMap::IsLoaded() {
757  return m_EmbeddedCount != 0;
758}
759
760FX_WCHAR CPDF_CID2UnicodeMap::UnicodeFromCID(uint16_t CID) {
761  if (m_Charset == CIDSET_UNICODE) {
762    return CID;
763  }
764  if (CID < m_EmbeddedCount) {
765    return m_pEmbeddedMap[CID];
766  }
767  return 0;
768}
769
770void CPDF_CID2UnicodeMap::Load(CPDF_CMapManager* pMgr,
771                               CIDSet charset,
772                               bool bPromptCJK) {
773  m_Charset = charset;
774
775  CPDF_FontGlobals* pFontGlobals =
776      CPDF_ModuleMgr::Get()->GetPageModule()->GetFontGlobals();
777  m_pEmbeddedMap = pFontGlobals->m_EmbeddedToUnicodes[charset].m_pMap;
778  m_EmbeddedCount = pFontGlobals->m_EmbeddedToUnicodes[charset].m_Count;
779}
780
781CIDSet CharsetFromOrdering(const CFX_ByteStringC& ordering) {
782  for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) {
783    if (ordering == g_CharsetNames[charset])
784      return CIDSetFromSizeT(charset);
785  }
786  return CIDSET_UNKNOWN;
787}
788