1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/parser/fpdf_parser_utility.h"
8
9#include "core/fpdfapi/parser/cpdf_array.h"
10#include "core/fpdfapi/parser/cpdf_boolean.h"
11#include "core/fpdfapi/parser/cpdf_dictionary.h"
12#include "core/fpdfapi/parser/cpdf_number.h"
13#include "core/fpdfapi/parser/cpdf_reference.h"
14#include "core/fpdfapi/parser/cpdf_stream.h"
15#include "core/fpdfapi/parser/cpdf_stream_acc.h"
16#include "core/fpdfapi/parser/cpdf_string.h"
17#include "core/fpdfapi/parser/fpdf_parser_decode.h"
18#include "core/fxcrt/fx_ext.h"
19
20// Indexed by 8-bit character code, contains either:
21//   'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
22//   'N' - for numeric: 0123456789+-.
23//   'D' - for delimiter: %()/<>[]{}
24//   'R' - otherwise.
25const char PDF_CharType[256] = {
26    // NUL  SOH  STX  ETX  EOT  ENQ  ACK  BEL  BS   HT   LF   VT   FF   CR   SO
27    // SI
28    'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
29    'R',
30
31    // DLE  DC1  DC2  DC3  DC4  NAK  SYN  ETB  CAN  EM   SUB  ESC  FS   GS   RS
32    // US
33    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
34    'R',
35
36    // SP    !    "    #    $    %    &    ´    (    )    *    +    ,    -    .
37    // /
38    'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
39    'D',
40
41    // 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    > ?
42    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
43    'R',
44
45    // @    A    B    C    D    E    F    G    H    I    J    K    L    M    N O
46    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
47    'R',
48
49    // P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^ _
50    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
51    'R',
52
53    // `    a    b    c    d    e    f    g    h    i    j    k    l    m    n o
54    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
55    'R',
56
57    // p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
58    // DEL
59    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
60    'R',
61
62    'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
63    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
64    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
65    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
66    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
67    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
68    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
69    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
70    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
71
72int32_t GetHeaderOffset(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile) {
73  const size_t kBufSize = 4;
74  uint8_t buf[kBufSize];
75  for (int32_t offset = 0; offset <= 1024; ++offset) {
76    if (!pFile->ReadBlock(buf, offset, kBufSize))
77      return -1;
78
79    if (memcmp(buf, "%PDF", 4) == 0)
80      return offset;
81  }
82  return -1;
83}
84
85int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteString& key) {
86  CPDF_Number* pObj = ToNumber(pDict->GetObjectFor(key));
87  return pObj ? pObj->GetInteger() : 0;
88}
89
90CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {
91  if (bstr.Find('#') == -1)
92    return CFX_ByteString(bstr);
93
94  int size = bstr.GetLength();
95  CFX_ByteString result;
96  FX_CHAR* pDestStart = result.GetBuffer(size);
97  FX_CHAR* pDest = pDestStart;
98  for (int i = 0; i < size; i++) {
99    if (bstr[i] == '#' && i < size - 2) {
100      *pDest++ =
101          FXSYS_toHexDigit(bstr[i + 1]) * 16 + FXSYS_toHexDigit(bstr[i + 2]);
102      i += 2;
103    } else {
104      *pDest++ = bstr[i];
105    }
106  }
107  result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
108  return result;
109}
110
111CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) {
112  if (orig.Find('#') == -1)
113    return orig;
114  return PDF_NameDecode(orig.AsStringC());
115}
116
117CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) {
118  uint8_t* src_buf = (uint8_t*)orig.c_str();
119  int src_len = orig.GetLength();
120  int dest_len = 0;
121  int i;
122  for (i = 0; i < src_len; i++) {
123    uint8_t ch = src_buf[i];
124    if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
125        PDFCharIsDelimiter(ch)) {
126      dest_len += 3;
127    } else {
128      dest_len++;
129    }
130  }
131  if (dest_len == src_len)
132    return orig;
133
134  CFX_ByteString res;
135  FX_CHAR* dest_buf = res.GetBuffer(dest_len);
136  dest_len = 0;
137  for (i = 0; i < src_len; i++) {
138    uint8_t ch = src_buf[i];
139    if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
140        PDFCharIsDelimiter(ch)) {
141      dest_buf[dest_len++] = '#';
142      dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
143      dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];
144    } else {
145      dest_buf[dest_len++] = ch;
146    }
147  }
148  dest_buf[dest_len] = 0;
149  res.ReleaseBuffer();
150  return res;
151}
152
153CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) {
154  if (!pObj) {
155    buf << " null";
156    return buf;
157  }
158  switch (pObj->GetType()) {
159    case CPDF_Object::NULLOBJ:
160      buf << " null";
161      break;
162    case CPDF_Object::BOOLEAN:
163    case CPDF_Object::NUMBER:
164      buf << " " << pObj->GetString();
165      break;
166    case CPDF_Object::STRING:
167      buf << PDF_EncodeString(pObj->GetString(), pObj->AsString()->IsHex());
168      break;
169    case CPDF_Object::NAME: {
170      CFX_ByteString str = pObj->GetString();
171      buf << "/" << PDF_NameEncode(str);
172      break;
173    }
174    case CPDF_Object::REFERENCE: {
175      buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
176      break;
177    }
178    case CPDF_Object::ARRAY: {
179      const CPDF_Array* p = pObj->AsArray();
180      buf << "[";
181      for (size_t i = 0; i < p->GetCount(); i++) {
182        CPDF_Object* pElement = p->GetObjectAt(i);
183        if (pElement && !pElement->IsInline()) {
184          buf << " " << pElement->GetObjNum() << " 0 R";
185        } else {
186          buf << pElement;
187        }
188      }
189      buf << "]";
190      break;
191    }
192    case CPDF_Object::DICTIONARY: {
193      const CPDF_Dictionary* p = pObj->AsDictionary();
194      buf << "<<";
195      for (const auto& it : *p) {
196        const CFX_ByteString& key = it.first;
197        CPDF_Object* pValue = it.second.get();
198        buf << "/" << PDF_NameEncode(key);
199        if (pValue && !pValue->IsInline()) {
200          buf << " " << pValue->GetObjNum() << " 0 R ";
201        } else {
202          buf << pValue;
203        }
204      }
205      buf << ">>";
206      break;
207    }
208    case CPDF_Object::STREAM: {
209      const CPDF_Stream* p = pObj->AsStream();
210      buf << p->GetDict() << "stream\r\n";
211      CPDF_StreamAcc acc;
212      acc.LoadAllData(p, true);
213      buf.AppendBlock(acc.GetData(), acc.GetSize());
214      buf << "\r\nendstream";
215      break;
216    }
217    default:
218      ASSERT(false);
219      break;
220  }
221  return buf;
222}
223