1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/font/cpdf_cmapparser.h"
8
9#include <vector>
10
11#include "core/fpdfapi/cmaps/cmap_int.h"
12#include "core/fpdfapi/cpdf_modulemgr.h"
13#include "core/fpdfapi/page/cpdf_pagemodule.h"
14#include "core/fpdfapi/parser/cpdf_array.h"
15#include "core/fpdfapi/parser/cpdf_dictionary.h"
16#include "core/fpdfapi/parser/cpdf_simple_parser.h"
17#include "core/fxcrt/fx_extension.h"
18#include "core/fxge/fx_freetype.h"
19#include "third_party/base/logging.h"
20
21namespace {
22
23const char* const g_CharsetNames[CIDSET_NUM_SETS] = {nullptr,  "GB1",    "CNS1",
24                                                     "Japan1", "Korea1", "UCS"};
25
26CIDSet CIDSetFromSizeT(size_t index) {
27  if (index >= CIDSET_NUM_SETS) {
28    NOTREACHED();
29    return CIDSET_UNKNOWN;
30  }
31  return static_cast<CIDSet>(index);
32}
33
34ByteStringView CMap_GetString(const ByteStringView& word) {
35  if (word.GetLength() <= 2)
36    return ByteStringView();
37  return word.Right(word.GetLength() - 2);
38}
39
40}  // namespace
41
42CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap)
43    : m_pCMap(pCMap), m_Status(0), m_CodeSeq(0) {}
44
45CPDF_CMapParser::~CPDF_CMapParser() {}
46
47void CPDF_CMapParser::ParseWord(const ByteStringView& word) {
48  if (word.IsEmpty()) {
49    return;
50  }
51  if (word == "begincidchar") {
52    m_Status = 1;
53    m_CodeSeq = 0;
54  } else if (word == "begincidrange") {
55    m_Status = 2;
56    m_CodeSeq = 0;
57  } else if (word == "endcidrange" || word == "endcidchar") {
58    m_Status = 0;
59  } else if (word == "/WMode") {
60    m_Status = 6;
61  } else if (word == "/Registry") {
62    m_Status = 3;
63  } else if (word == "/Ordering") {
64    m_Status = 4;
65  } else if (word == "/Supplement") {
66    m_Status = 5;
67  } else if (word == "begincodespacerange") {
68    m_Status = 7;
69    m_CodeSeq = 0;
70  } else if (word == "usecmap") {
71  } else if (m_Status == 1 || m_Status == 2) {
72    m_CodePoints[m_CodeSeq] = GetCode(word);
73    m_CodeSeq++;
74    uint32_t StartCode, EndCode;
75    uint16_t StartCID;
76    if (m_Status == 1) {
77      if (m_CodeSeq < 2) {
78        return;
79      }
80      EndCode = StartCode = m_CodePoints[0];
81      StartCID = (uint16_t)m_CodePoints[1];
82    } else {
83      if (m_CodeSeq < 3) {
84        return;
85      }
86      StartCode = m_CodePoints[0];
87      EndCode = m_CodePoints[1];
88      StartCID = (uint16_t)m_CodePoints[2];
89    }
90    if (EndCode < 0x10000) {
91      for (uint32_t code = StartCode; code <= EndCode; code++) {
92        m_pCMap->SetDirectCharcodeToCIDTable(
93            code, static_cast<uint16_t>(StartCID + code - StartCode));
94      }
95    } else {
96      m_AdditionalCharcodeToCIDMappings.push_back(
97          {StartCode, EndCode, StartCID});
98    }
99    m_CodeSeq = 0;
100  } else if (m_Status == 3) {
101    m_Status = 0;
102  } else if (m_Status == 4) {
103    m_pCMap->SetCharset(CharsetFromOrdering(CMap_GetString(word)));
104    m_Status = 0;
105  } else if (m_Status == 5) {
106    m_Status = 0;
107  } else if (m_Status == 6) {
108    m_pCMap->SetVertical(GetCode(word) != 0);
109    m_Status = 0;
110  } else if (m_Status == 7) {
111    if (word == "endcodespacerange") {
112      size_t nSegs = m_CodeRanges.size();
113      if (nSegs == 1) {
114        m_pCMap->SetCodingScheme((m_CodeRanges[0].m_CharSize == 2)
115                                     ? CPDF_CMap::TwoBytes
116                                     : CPDF_CMap::OneByte);
117      } else if (nSegs > 1) {
118        m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
119        m_pCMap->SetMixedFourByteLeadingRanges(m_CodeRanges);
120      }
121      m_Status = 0;
122    } else {
123      if (word.GetLength() == 0 || word[0] != '<') {
124        return;
125      }
126      if (m_CodeSeq % 2) {
127        CPDF_CMap::CodeRange range;
128        if (GetCodeRange(range, m_LastWord.AsStringView(), word))
129          m_CodeRanges.push_back(range);
130      }
131      m_CodeSeq++;
132    }
133  }
134  m_LastWord = word;
135}
136
137uint32_t CPDF_CMapParser::GetCode(const ByteStringView& word) const {
138  if (word.IsEmpty())
139    return 0;
140
141  pdfium::base::CheckedNumeric<uint32_t> num = 0;
142  if (word[0] == '<') {
143    for (size_t i = 1; i < word.GetLength() && std::isxdigit(word[i]); ++i) {
144      num = num * 16 + FXSYS_HexCharToInt(word[i]);
145      if (!num.IsValid())
146        return 0;
147    }
148    return num.ValueOrDie();
149  }
150
151  for (size_t i = 0; i < word.GetLength() && std::isdigit(word[i]); ++i) {
152    num = num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word[i]));
153    if (!num.IsValid())
154      return 0;
155  }
156  return num.ValueOrDie();
157}
158
159bool CPDF_CMapParser::GetCodeRange(CPDF_CMap::CodeRange& range,
160                                   const ByteStringView& first,
161                                   const ByteStringView& second) const {
162  if (first.GetLength() == 0 || first[0] != '<')
163    return false;
164
165  size_t i;
166  for (i = 1; i < first.GetLength(); ++i) {
167    if (first[i] == '>') {
168      break;
169    }
170  }
171  range.m_CharSize = (i - 1) / 2;
172  if (range.m_CharSize > 4)
173    return false;
174
175  for (i = 0; i < range.m_CharSize; ++i) {
176    uint8_t digit1 = first[i * 2 + 1];
177    uint8_t digit2 = first[i * 2 + 2];
178    range.m_Lower[i] =
179        FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
180  }
181
182  size_t size = second.GetLength();
183  for (i = 0; i < range.m_CharSize; ++i) {
184    uint8_t digit1 = (i * 2 + 1 < size) ? second[i * 2 + 1] : '0';
185    uint8_t digit2 = (i * 2 + 2 < size) ? second[i * 2 + 2] : '0';
186    range.m_Upper[i] =
187        FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
188  }
189  return true;
190}
191
192// static
193CIDSet CPDF_CMapParser::CharsetFromOrdering(const ByteStringView& ordering) {
194  for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) {
195    if (ordering == g_CharsetNames[charset])
196      return CIDSetFromSizeT(charset);
197  }
198  return CIDSET_UNKNOWN;
199}
200