1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/parser/fpdf_parser_decode.h"
8
9#include <limits.h>
10
11#include <algorithm>
12#include <sstream>
13#include <utility>
14#include <vector>
15
16#include "core/fpdfapi/cpdf_modulemgr.h"
17#include "core/fpdfapi/parser/cpdf_array.h"
18#include "core/fpdfapi/parser/cpdf_dictionary.h"
19#include "core/fpdfapi/parser/fpdf_parser_utility.h"
20#include "core/fxcodec/codec/ccodec_faxmodule.h"
21#include "core/fxcodec/codec/ccodec_flatemodule.h"
22#include "core/fxcodec/codec/ccodec_scanlinedecoder.h"
23#include "core/fxcodec/fx_codec.h"
24#include "core/fxcrt/fx_extension.h"
25#include "third_party/base/numerics/safe_math.h"
26
27namespace {
28
29const uint32_t kMaxStreamSize = 20 * 1024 * 1024;
30
31uint16_t GetUnicodeFromBytes(const uint8_t* bytes, bool bBE) {
32  return bBE ? (bytes[0] << 8 | bytes[1]) : (bytes[1] << 8 | bytes[0]);
33}
34
35bool CheckFlateDecodeParams(int Colors, int BitsPerComponent, int Columns) {
36  if (Colors < 0 || BitsPerComponent < 0 || Columns < 0)
37    return false;
38
39  pdfium::base::CheckedNumeric<int> check = Columns;
40  check *= Colors;
41  check *= BitsPerComponent;
42  if (!check.IsValid())
43    return false;
44
45  return check.ValueOrDie() <= INT_MAX - 7;
46}
47
48}  // namespace
49
50const uint16_t PDFDocEncoding[256] = {
51    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
52    0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
53    0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x02d8, 0x02c7, 0x02c6,
54    0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, 0x0020, 0x0021, 0x0022, 0x0023,
55    0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c,
56    0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035,
57    0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e,
58    0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
59    0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
60    0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
61    0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062,
62    0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b,
63    0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
64    0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d,
65    0x007e, 0x0000, 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192,
66    0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
67    0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 0x0178,
68    0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, 0x20ac, 0x00a1,
69    0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa,
70    0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3,
71    0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc,
72    0x00bd, 0x00be, 0x00bf, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5,
73    0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce,
74    0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
75    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0,
76    0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9,
77    0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2,
78    0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb,
79    0x00fc, 0x00fd, 0x00fe, 0x00ff};
80
81uint32_t A85Decode(const uint8_t* src_buf,
82                   uint32_t src_size,
83                   uint8_t** dest_buf,
84                   uint32_t* dest_size) {
85  *dest_size = 0;
86  *dest_buf = nullptr;
87  if (src_size == 0)
88    return 0;
89
90  // Count legal characters and zeros.
91  uint32_t zcount = 0;
92  uint32_t pos = 0;
93  while (pos < src_size) {
94    uint8_t ch = src_buf[pos];
95    if (ch == 'z') {
96      zcount++;
97    } else if ((ch < '!' || ch > 'u') && !PDFCharIsLineEnding(ch) &&
98               ch != ' ' && ch != '\t') {
99      break;
100    }
101    pos++;
102  }
103  // No content to decode.
104  if (pos == 0)
105    return 0;
106
107  // Count the space needed to contain non-zero characters. The encoding ratio
108  // of Ascii85 is 4:5.
109  uint32_t space_for_non_zeroes = (pos - zcount) / 5 * 4 + 4;
110  if (zcount > (UINT_MAX - space_for_non_zeroes) / 4)
111    return FX_INVALID_OFFSET;
112
113  *dest_buf = FX_Alloc(uint8_t, zcount * 4 + space_for_non_zeroes);
114  size_t state = 0;
115  uint32_t res = 0;
116  pos = 0;
117  while (pos < src_size) {
118    uint8_t ch = src_buf[pos++];
119    if (PDFCharIsLineEnding(ch) || ch == ' ' || ch == '\t')
120      continue;
121
122    if (ch == 'z') {
123      memset(*dest_buf + *dest_size, 0, 4);
124      state = 0;
125      res = 0;
126      *dest_size += 4;
127      continue;
128    }
129
130    // Check for the end or illegal character.
131    if (ch < '!' || ch > 'u')
132      break;
133
134    res = res * 85 + ch - 33;
135    if (state < 4) {
136      ++state;
137      continue;
138    }
139
140    for (size_t i = 0; i < 4; ++i) {
141      (*dest_buf)[(*dest_size)++] = static_cast<uint8_t>(res >> (3 - i) * 8);
142    }
143    state = 0;
144    res = 0;
145  }
146  // Handle partial group.
147  if (state) {
148    for (size_t i = state; i < 5; ++i)
149      res = res * 85 + 84;
150    for (size_t i = 0; i < state - 1; ++i)
151      (*dest_buf)[(*dest_size)++] = static_cast<uint8_t>(res >> (3 - i) * 8);
152  }
153  if (pos < src_size && src_buf[pos] == '>')
154    ++pos;
155  return pos;
156}
157
158uint32_t HexDecode(const uint8_t* src_buf,
159                   uint32_t src_size,
160                   uint8_t** dest_buf,
161                   uint32_t* dest_size) {
162  *dest_size = 0;
163  if (src_size == 0) {
164    *dest_buf = nullptr;
165    return 0;
166  }
167
168  uint32_t i = 0;
169  // Find the end of data.
170  while (i < src_size && src_buf[i] != '>')
171    ++i;
172
173  *dest_buf = FX_Alloc(uint8_t, i / 2 + 1);
174  bool bFirst = true;
175  for (i = 0; i < src_size; ++i) {
176    uint8_t ch = src_buf[i];
177    if (PDFCharIsLineEnding(ch) || ch == ' ' || ch == '\t')
178      continue;
179
180    if (ch == '>') {
181      ++i;
182      break;
183    }
184    if (!std::isxdigit(ch))
185      continue;
186
187    int digit = FXSYS_HexCharToInt(ch);
188    if (bFirst)
189      (*dest_buf)[*dest_size] = digit * 16;
190    else
191      (*dest_buf)[(*dest_size)++] += digit;
192    bFirst = !bFirst;
193  }
194  if (!bFirst)
195    ++(*dest_size);
196  return i;
197}
198
199uint32_t RunLengthDecode(const uint8_t* src_buf,
200                         uint32_t src_size,
201                         uint8_t** dest_buf,
202                         uint32_t* dest_size) {
203  uint32_t i = 0;
204  *dest_size = 0;
205  while (i < src_size) {
206    if (src_buf[i] == 128)
207      break;
208
209    uint32_t old = *dest_size;
210    if (src_buf[i] < 128) {
211      *dest_size += src_buf[i] + 1;
212      if (*dest_size < old)
213        return FX_INVALID_OFFSET;
214      i += src_buf[i] + 2;
215    } else {
216      *dest_size += 257 - src_buf[i];
217      if (*dest_size < old)
218        return FX_INVALID_OFFSET;
219      i += 2;
220    }
221  }
222  if (*dest_size >= kMaxStreamSize)
223    return FX_INVALID_OFFSET;
224
225  *dest_buf = FX_Alloc(uint8_t, *dest_size);
226  i = 0;
227  int dest_count = 0;
228  while (i < src_size) {
229    if (src_buf[i] == 128)
230      break;
231
232    if (src_buf[i] < 128) {
233      uint32_t copy_len = src_buf[i] + 1;
234      uint32_t buf_left = src_size - i - 1;
235      if (buf_left < copy_len) {
236        uint32_t delta = copy_len - buf_left;
237        copy_len = buf_left;
238        memset(*dest_buf + dest_count + copy_len, '\0', delta);
239      }
240      memcpy(*dest_buf + dest_count, src_buf + i + 1, copy_len);
241      dest_count += src_buf[i] + 1;
242      i += src_buf[i] + 2;
243    } else {
244      int fill = 0;
245      if (i < src_size - 1)
246        fill = src_buf[i + 1];
247      memset(*dest_buf + dest_count, fill, 257 - src_buf[i]);
248      dest_count += 257 - src_buf[i];
249      i += 2;
250    }
251  }
252  return std::min(i + 1, src_size);
253}
254
255std::unique_ptr<CCodec_ScanlineDecoder> FPDFAPI_CreateFaxDecoder(
256    const uint8_t* src_buf,
257    uint32_t src_size,
258    int width,
259    int height,
260    const CPDF_Dictionary* pParams) {
261  int K = 0;
262  bool EndOfLine = false;
263  bool ByteAlign = false;
264  bool BlackIs1 = false;
265  int Columns = 1728;
266  int Rows = 0;
267  if (pParams) {
268    K = pParams->GetIntegerFor("K");
269    EndOfLine = !!pParams->GetIntegerFor("EndOfLine");
270    ByteAlign = !!pParams->GetIntegerFor("EncodedByteAlign");
271    BlackIs1 = !!pParams->GetIntegerFor("BlackIs1");
272    Columns = pParams->GetIntegerFor("Columns", 1728);
273    Rows = pParams->GetIntegerFor("Rows");
274    if (Rows > USHRT_MAX)
275      Rows = 0;
276  }
277  return CPDF_ModuleMgr::Get()->GetFaxModule()->CreateDecoder(
278      src_buf, src_size, width, height, K, EndOfLine, ByteAlign, BlackIs1,
279      Columns, Rows);
280}
281
282std::unique_ptr<CCodec_ScanlineDecoder> FPDFAPI_CreateFlateDecoder(
283    const uint8_t* src_buf,
284    uint32_t src_size,
285    int width,
286    int height,
287    int nComps,
288    int bpc,
289    const CPDF_Dictionary* pParams) {
290  int predictor = 0;
291  int Colors = 0;
292  int BitsPerComponent = 0;
293  int Columns = 0;
294  if (pParams) {
295    predictor = pParams->GetIntegerFor("Predictor");
296    Colors = pParams->GetIntegerFor("Colors", 1);
297    BitsPerComponent = pParams->GetIntegerFor("BitsPerComponent", 8);
298    Columns = pParams->GetIntegerFor("Columns", 1);
299    if (!CheckFlateDecodeParams(Colors, BitsPerComponent, Columns))
300      return nullptr;
301  }
302  return CPDF_ModuleMgr::Get()->GetFlateModule()->CreateDecoder(
303      src_buf, src_size, width, height, nComps, bpc, predictor, Colors,
304      BitsPerComponent, Columns);
305}
306
307uint32_t FPDFAPI_FlateOrLZWDecode(bool bLZW,
308                                  const uint8_t* src_buf,
309                                  uint32_t src_size,
310                                  CPDF_Dictionary* pParams,
311                                  uint32_t estimated_size,
312                                  uint8_t** dest_buf,
313                                  uint32_t* dest_size) {
314  int predictor = 0;
315  int Colors = 0;
316  int BitsPerComponent = 0;
317  int Columns = 0;
318  bool bEarlyChange = true;
319  if (pParams) {
320    predictor = pParams->GetIntegerFor("Predictor");
321    bEarlyChange = !!pParams->GetIntegerFor("EarlyChange", 1);
322    Colors = pParams->GetIntegerFor("Colors", 1);
323    BitsPerComponent = pParams->GetIntegerFor("BitsPerComponent", 8);
324    Columns = pParams->GetIntegerFor("Columns", 1);
325    if (!CheckFlateDecodeParams(Colors, BitsPerComponent, Columns))
326      return FX_INVALID_OFFSET;
327  }
328  return CPDF_ModuleMgr::Get()->GetFlateModule()->FlateOrLZWDecode(
329      bLZW, src_buf, src_size, bEarlyChange, predictor, Colors,
330      BitsPerComponent, Columns, estimated_size, dest_buf, dest_size);
331}
332
333bool PDF_DataDecode(const uint8_t* src_buf,
334                    uint32_t src_size,
335                    const CPDF_Dictionary* pDict,
336                    uint32_t last_estimated_size,
337                    bool bImageAcc,
338                    uint8_t** dest_buf,
339                    uint32_t* dest_size,
340                    ByteString* ImageEncoding,
341                    CPDF_Dictionary** pImageParms) {
342  CPDF_Object* pDecoder = pDict ? pDict->GetDirectObjectFor("Filter") : nullptr;
343  if (!pDecoder || (!pDecoder->IsArray() && !pDecoder->IsName()))
344    return false;
345
346  CPDF_Object* pParams =
347      pDict ? pDict->GetDirectObjectFor("DecodeParms") : nullptr;
348
349  std::vector<std::pair<ByteString, CPDF_Object*>> DecoderArray;
350  if (CPDF_Array* pDecoders = pDecoder->AsArray()) {
351    CPDF_Array* pParamsArray = ToArray(pParams);
352    for (size_t i = 0; i < pDecoders->GetCount(); ++i) {
353      DecoderArray.push_back(
354          {pDecoders->GetStringAt(i),
355           pParamsArray ? pParamsArray->GetDictAt(i) : nullptr});
356    }
357  } else {
358    DecoderArray.push_back(
359        {pDecoder->GetString(), pParams ? pParams->GetDict() : nullptr});
360  }
361  uint8_t* last_buf = const_cast<uint8_t*>(src_buf);
362  uint32_t last_size = src_size;
363  size_t nSize = DecoderArray.size();
364  for (size_t i = 0; i < nSize; ++i) {
365    int estimated_size = i == nSize - 1 ? last_estimated_size : 0;
366    ByteString decoder = DecoderArray[i].first;
367    CPDF_Dictionary* pParam = ToDictionary(DecoderArray[i].second);
368    uint8_t* new_buf = nullptr;
369    uint32_t new_size = 0xFFFFFFFF;
370    uint32_t offset = FX_INVALID_OFFSET;
371    if (decoder == "Crypt")
372      continue;
373    if (decoder == "FlateDecode" || decoder == "Fl") {
374      if (bImageAcc && i == nSize - 1) {
375        *ImageEncoding = "FlateDecode";
376        *dest_buf = last_buf;
377        *dest_size = last_size;
378        *pImageParms = pParam;
379        return true;
380      }
381      offset = FPDFAPI_FlateOrLZWDecode(false, last_buf, last_size, pParam,
382                                        estimated_size, &new_buf, &new_size);
383    } else if (decoder == "LZWDecode" || decoder == "LZW") {
384      offset = FPDFAPI_FlateOrLZWDecode(true, last_buf, last_size, pParam,
385                                        estimated_size, &new_buf, &new_size);
386    } else if (decoder == "ASCII85Decode" || decoder == "A85") {
387      offset = A85Decode(last_buf, last_size, &new_buf, &new_size);
388    } else if (decoder == "ASCIIHexDecode" || decoder == "AHx") {
389      offset = HexDecode(last_buf, last_size, &new_buf, &new_size);
390    } else if (decoder == "RunLengthDecode" || decoder == "RL") {
391      if (bImageAcc && i == nSize - 1) {
392        *ImageEncoding = "RunLengthDecode";
393        *dest_buf = last_buf;
394        *dest_size = last_size;
395        *pImageParms = pParam;
396        return true;
397      }
398      offset = RunLengthDecode(last_buf, last_size, &new_buf, &new_size);
399    } else {
400      // If we get here, assume it's an image decoder.
401      if (decoder == "DCT")
402        decoder = "DCTDecode";
403      else if (decoder == "CCF")
404        decoder = "CCITTFaxDecode";
405      *ImageEncoding = decoder;
406      *pImageParms = pParam;
407      *dest_buf = last_buf;
408      *dest_size = last_size;
409      return true;
410    }
411    if (last_buf != src_buf)
412      FX_Free(last_buf);
413    if (offset == FX_INVALID_OFFSET) {
414      FX_Free(new_buf);
415      return false;
416    }
417    last_buf = new_buf;
418    last_size = new_size;
419  }
420  ImageEncoding->clear();
421  *pImageParms = nullptr;
422  *dest_buf = last_buf;
423  *dest_size = last_size;
424  return true;
425}
426
427WideString PDF_DecodeText(const uint8_t* src_data, uint32_t src_len) {
428  WideString result;
429  if (src_len >= 2 && ((src_data[0] == 0xfe && src_data[1] == 0xff) ||
430                       (src_data[0] == 0xff && src_data[1] == 0xfe))) {
431    uint32_t max_chars = (src_len - 2) / 2;
432    if (!max_chars)
433      return result;
434
435    bool bBE = src_data[0] == 0xfe || (src_data[0] == 0xff && !src_data[2]);
436    wchar_t* dest_buf = result.GetBuffer(max_chars);
437    const uint8_t* uni_str = src_data + 2;
438    int dest_pos = 0;
439    for (uint32_t i = 0; i < max_chars * 2; i += 2) {
440      uint16_t unicode = GetUnicodeFromBytes(uni_str + i, bBE);
441      if (unicode != 0x1b) {
442        dest_buf[dest_pos++] = unicode;
443        continue;
444      }
445
446      i += 2;
447      while (i < max_chars * 2) {
448        uint16_t unicode2 = GetUnicodeFromBytes(uni_str + i, bBE);
449        i += 2;
450        if (unicode2 == 0x1b)
451          break;
452      }
453    }
454    result.ReleaseBuffer(dest_pos);
455  } else {
456    wchar_t* dest_buf = result.GetBuffer(src_len);
457    for (uint32_t i = 0; i < src_len; ++i)
458      dest_buf[i] = PDFDocEncoding[src_data[i]];
459    result.ReleaseBuffer(src_len);
460  }
461  return result;
462}
463
464WideString PDF_DecodeText(const ByteString& bstr) {
465  return PDF_DecodeText(reinterpret_cast<const uint8_t*>(bstr.c_str()),
466                        bstr.GetLength());
467}
468
469ByteString PDF_EncodeText(const wchar_t* pString, int len) {
470  if (len == -1)
471    len = wcslen(pString);
472
473  ByteString result;
474  char* dest_buf1 = result.GetBuffer(len);
475  int i;
476  for (i = 0; i < len; ++i) {
477    int code;
478    for (code = 0; code < 256; ++code) {
479      if (PDFDocEncoding[code] == pString[i])
480        break;
481    }
482
483    if (code == 256)
484      break;
485
486    dest_buf1[i] = code;
487  }
488  result.ReleaseBuffer(i);
489  if (i == len)
490    return result;
491
492  if (len > INT_MAX / 2 - 1) {
493    result.ReleaseBuffer(0);
494    return result;
495  }
496
497  int encLen = len * 2 + 2;
498
499  uint8_t* dest_buf2 = reinterpret_cast<uint8_t*>(result.GetBuffer(encLen));
500  dest_buf2[0] = 0xfe;
501  dest_buf2[1] = 0xff;
502  dest_buf2 += 2;
503  for (int j = 0; j < len; ++j) {
504    *dest_buf2++ = pString[j] >> 8;
505    *dest_buf2++ = static_cast<uint8_t>(pString[j]);
506  }
507  result.ReleaseBuffer(encLen);
508  return result;
509}
510
511ByteString PDF_EncodeText(const WideString& str) {
512  return PDF_EncodeText(str.c_str(), str.GetLength());
513}
514
515ByteString PDF_EncodeString(const ByteString& src, bool bHex) {
516  std::ostringstream result;
517  int srclen = src.GetLength();
518  if (bHex) {
519    result << '<';
520    for (int i = 0; i < srclen; ++i) {
521      char buf[2];
522      FXSYS_IntToTwoHexChars(src[i], buf);
523      result << buf[0];
524      result << buf[1];
525    }
526    result << '>';
527    return ByteString(result);
528  }
529  result << '(';
530  for (int i = 0; i < srclen; ++i) {
531    uint8_t ch = src[i];
532    if (ch == 0x0a) {
533      result << "\\n";
534      continue;
535    }
536    if (ch == 0x0d) {
537      result << "\\r";
538      continue;
539    }
540    if (ch == ')' || ch == '\\' || ch == '(')
541      result << '\\';
542    result << static_cast<char>(ch);
543  }
544  result << ')';
545  return ByteString(result);
546}
547
548bool FlateEncode(const uint8_t* src_buf,
549                 uint32_t src_size,
550                 uint8_t** dest_buf,
551                 uint32_t* dest_size) {
552  CCodec_ModuleMgr* pEncoders = CPDF_ModuleMgr::Get()->GetCodecModule();
553  return pEncoders->GetFlateModule()->Encode(src_buf, src_size, dest_buf,
554                                             dest_size);
555}
556
557bool PngEncode(const uint8_t* src_buf,
558               uint32_t src_size,
559               uint8_t** dest_buf,
560               uint32_t* dest_size) {
561  CCodec_ModuleMgr* pEncoders = CPDF_ModuleMgr::Get()->GetCodecModule();
562  return pEncoders->GetFlateModule()->PngEncode(src_buf, src_size, dest_buf,
563                                                dest_size);
564}
565
566uint32_t FlateDecode(const uint8_t* src_buf,
567                     uint32_t src_size,
568                     uint8_t** dest_buf,
569                     uint32_t* dest_size) {
570  CCodec_ModuleMgr* pEncoders = CPDF_ModuleMgr::Get()->GetCodecModule();
571  return pEncoders->GetFlateModule()->FlateOrLZWDecode(
572      false, src_buf, src_size, false, 0, 0, 0, 0, 0, dest_buf, dest_size);
573}
574