1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "../../../include/fpdfapi/fpdf_parser.h"
8extern const FX_LPCSTR _PDF_CharType =
9    "WRRRRRRRRWWRWWRRRRRRRRRRRRRRRRRR"
10    "WRRRRDRRDDRNRNNDNNNNNNNNNNRRDRDR"
11    "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR"
12    "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR"
13    "WRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
14    "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
15    "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
16    "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRW";
17#ifndef MAX_PATH
18#define MAX_PATH 4096
19#endif
20CPDF_SimpleParser::CPDF_SimpleParser(FX_LPCBYTE pData, FX_DWORD dwSize)
21{
22    m_pData = pData;
23    m_dwSize = dwSize;
24    m_dwCurPos = 0;
25}
26CPDF_SimpleParser::CPDF_SimpleParser(FX_BSTR str)
27{
28    m_pData = str;
29    m_dwSize = str.GetLength();
30    m_dwCurPos = 0;
31}
32void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& type)
33{
34    pStart = NULL;
35    dwSize = 0;
36    type = PDFWORD_EOF;
37    FX_BYTE ch;
38    char chartype;
39    while (1) {
40        if (m_dwSize <= m_dwCurPos) {
41            return;
42        }
43        ch = m_pData[m_dwCurPos++];
44        chartype = _PDF_CharType[ch];
45        while (chartype == 'W') {
46            if (m_dwSize <= m_dwCurPos) {
47                return;
48            }
49            ch = m_pData[m_dwCurPos++];
50            chartype = _PDF_CharType[ch];
51        }
52        if (ch != '%') {
53            break;
54        }
55        while (1) {
56            if (m_dwSize <= m_dwCurPos) {
57                return;
58            }
59            ch = m_pData[m_dwCurPos++];
60            if (ch == '\r' || ch == '\n') {
61                break;
62            }
63        }
64        chartype = _PDF_CharType[ch];
65    }
66    FX_DWORD start_pos = m_dwCurPos - 1;
67    pStart = m_pData + start_pos;
68    if (chartype == 'D') {
69        if (ch == '/') {
70            while (1) {
71                if (m_dwSize <= m_dwCurPos) {
72                    return;
73                }
74                ch = m_pData[m_dwCurPos++];
75                chartype = _PDF_CharType[ch];
76                if (chartype != 'R' && chartype != 'N') {
77                    m_dwCurPos --;
78                    dwSize = m_dwCurPos - start_pos;
79                    type = PDFWORD_NAME;
80                    return;
81                }
82            }
83        } else {
84            type = PDFWORD_DELIMITER;
85            dwSize = 1;
86            if (ch == '<') {
87                if (m_dwSize <= m_dwCurPos) {
88                    return;
89                }
90                ch = m_pData[m_dwCurPos++];
91                if (ch == '<') {
92                    dwSize = 2;
93                } else {
94                    m_dwCurPos --;
95                }
96            } else if (ch == '>') {
97                if (m_dwSize <= m_dwCurPos) {
98                    return;
99                }
100                ch = m_pData[m_dwCurPos++];
101                if (ch == '>') {
102                    dwSize = 2;
103                } else {
104                    m_dwCurPos --;
105                }
106            }
107        }
108        return;
109    }
110    type = PDFWORD_NUMBER;
111    dwSize = 1;
112    while (1) {
113        if (chartype != 'N') {
114            type = PDFWORD_TEXT;
115        }
116        if (m_dwSize <= m_dwCurPos) {
117            return;
118        }
119        ch = m_pData[m_dwCurPos++];
120        chartype = _PDF_CharType[ch];
121        if (chartype == 'D' || chartype == 'W') {
122            m_dwCurPos --;
123            break;
124        }
125        dwSize ++;
126    }
127}
128CFX_ByteStringC CPDF_SimpleParser::GetWord()
129{
130    FX_LPCBYTE pStart;
131    FX_DWORD dwSize;
132    int type;
133    ParseWord(pStart, dwSize, type);
134    if (dwSize == 1 && pStart[0] == '<') {
135        while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
136            m_dwCurPos ++;
137        }
138        if (m_dwCurPos < m_dwSize) {
139            m_dwCurPos ++;
140        }
141        return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
142    } else if (dwSize == 1 && pStart[0] == '(') {
143        int level = 1;
144        while (m_dwCurPos < m_dwSize) {
145            if (m_pData[m_dwCurPos] == ')') {
146                level --;
147                if (level == 0) {
148                    break;
149                }
150            }
151            if (m_pData[m_dwCurPos] == '\\') {
152                if (m_dwSize <= m_dwCurPos) {
153                    break;
154                }
155                m_dwCurPos ++;
156            } else if (m_pData[m_dwCurPos] == '(') {
157                level ++;
158            }
159            if (m_dwSize <= m_dwCurPos) {
160                break;
161            }
162            m_dwCurPos ++;
163        }
164        if (m_dwCurPos < m_dwSize) {
165            m_dwCurPos ++;
166        }
167        return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
168    }
169    return CFX_ByteStringC(pStart, dwSize);
170}
171FX_BOOL CPDF_SimpleParser::SearchToken(FX_BSTR token)
172{
173    int token_len = token.GetLength();
174    while (m_dwCurPos < m_dwSize - token_len) {
175        if (FXSYS_memcmp32(m_pData + m_dwCurPos, token, token_len) == 0) {
176            break;
177        }
178        m_dwCurPos ++;
179    }
180    if (m_dwCurPos == m_dwSize - token_len) {
181        return FALSE;
182    }
183    m_dwCurPos += token_len;
184    return TRUE;
185}
186FX_BOOL CPDF_SimpleParser::SkipWord(FX_BSTR token)
187{
188    while (1) {
189        CFX_ByteStringC word = GetWord();
190        if (word.IsEmpty()) {
191            return FALSE;
192        }
193        if (word == token) {
194            return TRUE;
195        }
196    }
197    return FALSE;
198}
199FX_BOOL CPDF_SimpleParser::FindTagPair(FX_BSTR start_token, FX_BSTR end_token,
200                                       FX_DWORD& start_pos, FX_DWORD& end_pos)
201{
202    if (!start_token.IsEmpty()) {
203        if (!SkipWord(start_token)) {
204            return FALSE;
205        }
206        start_pos = m_dwCurPos;
207    }
208    while (1) {
209        end_pos = m_dwCurPos;
210        CFX_ByteStringC word = GetWord();
211        if (word.IsEmpty()) {
212            return FALSE;
213        }
214        if (word == end_token) {
215            return TRUE;
216        }
217    }
218    return FALSE;
219}
220FX_BOOL CPDF_SimpleParser::FindTagParam(FX_BSTR token, int nParams)
221{
222    nParams ++;
223    FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
224    int buf_index = 0;
225    int buf_count = 0;
226    while (1) {
227        pBuf[buf_index++] = m_dwCurPos;
228        if (buf_index == nParams) {
229            buf_index = 0;
230        }
231        buf_count ++;
232        if (buf_count > nParams) {
233            buf_count = nParams;
234        }
235        CFX_ByteStringC word = GetWord();
236        if (word.IsEmpty()) {
237            FX_Free(pBuf);
238            return FALSE;
239        }
240        if (word == token) {
241            if (buf_count < nParams) {
242                continue;
243            }
244            m_dwCurPos = pBuf[buf_index];
245            FX_Free(pBuf);
246            return TRUE;
247        }
248    }
249    return FALSE;
250}
251static int _hex2dec(char ch)
252{
253    if (ch >= '0' && ch <= '9') {
254        return ch - '0';
255    }
256    if (ch >= 'a' && ch <= 'f') {
257        return ch - 'a' + 10;
258    }
259    if (ch >= 'A' && ch <= 'F') {
260        return ch - 'A' + 10;
261    }
262    return 0;
263}
264CFX_ByteString PDF_NameDecode(FX_BSTR bstr)
265{
266    int size = bstr.GetLength();
267    FX_LPCSTR pSrc = bstr.GetCStr();
268    if (FXSYS_memchr(pSrc, '#', size) == NULL) {
269        return bstr;
270    }
271    CFX_ByteString result;
272    FX_LPSTR pDestStart = result.GetBuffer(size);
273    FX_LPSTR pDest = pDestStart;
274    for (int i = 0; i < size; i ++) {
275        if (pSrc[i] == '#' && i < size - 2) {
276            *pDest ++ = _hex2dec(pSrc[i + 1]) * 16 + _hex2dec(pSrc[i + 2]);
277            i += 2;
278        } else {
279            *pDest ++ = pSrc[i];
280        }
281    }
282    result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
283    return result;
284}
285CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig)
286{
287    if (FXSYS_memchr((FX_LPCSTR)orig, '#', orig.GetLength()) == NULL) {
288        return orig;
289    }
290    return PDF_NameDecode(CFX_ByteStringC(orig));
291}
292CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig)
293{
294    FX_LPBYTE src_buf = (FX_LPBYTE)(FX_LPCSTR)orig;
295    int src_len = orig.GetLength();
296    int dest_len = 0;
297    int i;
298    for (i = 0; i < src_len; i ++) {
299        FX_BYTE ch = src_buf[i];
300        if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' ||
301                _PDF_CharType[ch] == 'D') {
302            dest_len += 3;
303        } else {
304            dest_len ++;
305        }
306    }
307    if (dest_len == src_len) {
308        return orig;
309    }
310    CFX_ByteString res;
311    FX_LPSTR dest_buf = res.GetBuffer(dest_len);
312    dest_len = 0;
313    for (i = 0; i < src_len; i ++) {
314        FX_BYTE ch = src_buf[i];
315        if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' ||
316                _PDF_CharType[ch] == 'D') {
317            dest_buf[dest_len++] = '#';
318            dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
319            dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];
320        } else {
321            dest_buf[dest_len++] = ch;
322        }
323    }
324    dest_buf[dest_len] = 0;
325    res.ReleaseBuffer();
326    return res;
327}
328CFX_ByteTextBuf& operator << (CFX_ByteTextBuf& buf, const CPDF_Object* pObj)
329{
330    if (pObj == NULL) {
331        buf << FX_BSTRC(" null");
332        return buf;
333    }
334    switch (pObj->GetType()) {
335        case PDFOBJ_NULL:
336            buf << FX_BSTRC(" null");
337            break;
338        case PDFOBJ_BOOLEAN:
339        case PDFOBJ_NUMBER:
340            buf << " " << pObj->GetString();
341            break;
342        case PDFOBJ_STRING: {
343                CFX_ByteString str = pObj->GetString();
344                FX_BOOL bHex = ((CPDF_String*)pObj)->IsHex();
345                buf << PDF_EncodeString(str, bHex);
346                break;
347            }
348        case PDFOBJ_NAME: {
349                CFX_ByteString str = pObj->GetString();
350                buf << FX_BSTRC("/") << PDF_NameEncode(str);
351                break;
352            }
353        case PDFOBJ_REFERENCE: {
354                CPDF_Reference* p = (CPDF_Reference*)pObj;
355                buf << " " << p->GetRefObjNum() << FX_BSTRC(" 0 R ");
356                break;
357            }
358        case PDFOBJ_ARRAY: {
359                CPDF_Array* p = (CPDF_Array*)pObj;
360                buf << FX_BSTRC("[");
361                for (FX_DWORD i = 0; i < p->GetCount(); i ++) {
362                    CPDF_Object* pElement = p->GetElement(i);
363                    if (pElement->GetObjNum()) {
364                        buf << " " << pElement->GetObjNum() << FX_BSTRC(" 0 R");
365                    } else {
366                        buf << pElement;
367                    }
368                }
369                buf << FX_BSTRC("]");
370                break;
371            }
372        case PDFOBJ_DICTIONARY: {
373                CPDF_Dictionary* p = (CPDF_Dictionary*)pObj;
374                buf << FX_BSTRC("<<");
375                FX_POSITION pos = p->GetStartPos();
376                while (pos) {
377                    CFX_ByteString key;
378                    CPDF_Object* pValue = p->GetNextElement(pos, key);
379                    buf << FX_BSTRC("/") << PDF_NameEncode(key);
380                    if (pValue->GetObjNum()) {
381                        buf << " " << pValue->GetObjNum() << FX_BSTRC(" 0 R ");
382                    } else {
383                        buf << pValue;
384                    }
385                }
386                buf << FX_BSTRC(">>");
387                break;
388            }
389        case PDFOBJ_STREAM: {
390                CPDF_Stream* p = (CPDF_Stream*)pObj;
391                buf << p->GetDict() << FX_BSTRC("stream\r\n");
392                CPDF_StreamAcc acc;
393                acc.LoadAllData(p, TRUE);
394                buf.AppendBlock(acc.GetData(), acc.GetSize());
395                buf << FX_BSTRC("\r\nendstream");
396                break;
397            }
398        default:
399            ASSERT(FALSE);
400            break;
401    }
402    return buf;
403}
404FX_FLOAT PDF_ClipFloat(FX_FLOAT f)
405{
406    if (f < 0) {
407        return 0;
408    }
409    if (f > 1.0f) {
410        return 1.0f;
411    }
412    return f;
413}
414static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num)
415{
416    CPDF_Array* pLimits = pNode->GetArray("Limits");
417    if (pLimits && (num < pLimits->GetInteger(0) || num > pLimits->GetInteger(1))) {
418        return NULL;
419    }
420    CPDF_Array* pNumbers = pNode->GetArray("Nums");
421    if (pNumbers) {
422        FX_DWORD dwCount = pNumbers->GetCount() / 2;
423        for (FX_DWORD i = 0; i < dwCount; i ++) {
424            int index = pNumbers->GetInteger(i * 2);
425            if (num == index) {
426                return pNumbers->GetElementValue(i * 2 + 1);
427            }
428            if (index > num) {
429                break;
430            }
431        }
432        return NULL;
433    }
434    CPDF_Array* pKids = pNode->GetArray("Kids");
435    if (pKids == NULL) {
436        return NULL;
437    }
438    for (FX_DWORD i = 0; i < pKids->GetCount(); i ++) {
439        CPDF_Dictionary* pKid = pKids->GetDict(i);
440        if (pKid == NULL) {
441            continue;
442        }
443        CPDF_Object* pFound = SearchNumberNode(pKid, num);
444        if (pFound) {
445            return pFound;
446        }
447    }
448    return NULL;
449}
450CPDF_Object* CPDF_NumberTree::LookupValue(int num)
451{
452    return SearchNumberNode(m_pRoot, num);
453}
454