fpdf_page_parser_old.cpp revision ee451cb395940862dad63c85adfe8f2fd55e864c
1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "../../../include/fpdfapi/fpdf_page.h"
8#include "../../../include/fpdfapi/fpdf_module.h"
9#include "../../../include/fxcodec/fx_codec.h"
10#include "pageint.h"
11#include <limits.h>
12extern const FX_LPCSTR _PDF_OpCharType =
13    "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
14    "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII"
15    "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII"
16    "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII"
17    "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
18    "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
19    "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
20    "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII";
21FX_BOOL _PDF_HasInvalidOpChar(FX_LPCSTR op)
22{
23    if(!op) {
24        return FALSE;
25    }
26    FX_BYTE ch;
27    while((ch = *op++)) {
28        if(_PDF_OpCharType[ch] == 'I') {
29            return TRUE;
30        }
31    }
32    return FALSE;
33}
34FX_DWORD CPDF_StreamContentParser::Parse(FX_LPCBYTE pData, FX_DWORD dwSize, FX_DWORD max_cost)
35{
36    if (m_Level > _FPDF_MAX_FORM_LEVEL_) {
37        return dwSize;
38    }
39    FX_DWORD InitObjCount = m_pObjectList->CountObjects();
40    CPDF_StreamParser syntax(pData, dwSize);
41    m_pSyntax = &syntax;
42    m_CompatCount = 0;
43    while (1) {
44        FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount;
45        if (max_cost && cost >= max_cost) {
46            break;
47        }
48        switch (syntax.ParseNextElement()) {
49            case CPDF_StreamParser::EndOfData:
50                return m_pSyntax->GetPos();
51            case CPDF_StreamParser::Keyword:
52                if(!OnOperator((char*)syntax.GetWordBuf()) && _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) {
53                    m_bAbort = TRUE;
54                }
55                if (m_bAbort) {
56                    return m_pSyntax->GetPos();
57                }
58                ClearAllParams();
59                break;
60            case CPDF_StreamParser::Number:
61                AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize());
62                break;
63            case CPDF_StreamParser::Name:
64                AddNameParam((FX_LPCSTR)syntax.GetWordBuf() + 1, syntax.GetWordSize() - 1);
65                break;
66            default:
67                AddObjectParam(syntax.GetObject());
68        }
69    }
70    return m_pSyntax->GetPos();
71}
72void _PDF_ReplaceAbbr(CPDF_Object* pObj);
73void CPDF_StreamContentParser::Handle_BeginImage()
74{
75    FX_FILESIZE savePos = m_pSyntax->GetPos();
76    CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
77    while (1) {
78        CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
79        if (type == CPDF_StreamParser::Keyword) {
80            CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize());
81            if (bsKeyword != FX_BSTRC("ID")) {
82                m_pSyntax->SetPos(savePos);
83                pDict->Release();
84                return;
85            }
86        }
87        if (type != CPDF_StreamParser::Name) {
88            break;
89        }
90        CFX_ByteString key((FX_LPCSTR)m_pSyntax->GetWordBuf() + 1, m_pSyntax->GetWordSize() - 1);
91        CPDF_Object* pObj = m_pSyntax->ReadNextObject();
92        if (!key.IsEmpty()) {
93            pDict->SetAt(key, pObj, m_pDocument);
94        } else {
95            pObj->Release();
96        }
97    }
98    _PDF_ReplaceAbbr(pDict);
99    CPDF_Object* pCSObj = NULL;
100    if (pDict->KeyExist(FX_BSTRC("ColorSpace"))) {
101        pCSObj = pDict->GetElementValue(FX_BSTRC("ColorSpace"));
102        if (pCSObj->GetType() == PDFOBJ_NAME) {
103            CFX_ByteString name = pCSObj->GetString();
104            if (name != FX_BSTRC("DeviceRGB") && name != FX_BSTRC("DeviceGray") && name != FX_BSTRC("DeviceCMYK")) {
105                pCSObj = FindResourceObj(FX_BSTRC("ColorSpace"), name);
106                if (pCSObj && !pCSObj->GetObjNum()) {
107                    pCSObj = pCSObj->Clone();
108                    pDict->SetAt(FX_BSTRC("ColorSpace"), pCSObj, m_pDocument);
109                }
110            }
111        }
112    }
113    CPDF_Stream* pStream = m_pSyntax->ReadInlineStream(m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage);
114    while (1) {
115        CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
116        if (type == CPDF_StreamParser::EndOfData) {
117            break;
118        }
119        if (type != CPDF_StreamParser::Keyword) {
120            continue;
121        }
122        if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' &&
123                m_pSyntax->GetWordBuf()[1] == 'I') {
124            break;
125        }
126    }
127    if (m_Options.m_bTextOnly) {
128        if (pStream) {
129            pStream->Release();
130        } else {
131            pDict->Release();
132        }
133        return;
134    }
135    pDict->SetAtName(FX_BSTRC("Subtype"), FX_BSTRC("Image"));
136    CPDF_ImageObject *pImgObj = AddImage(pStream, NULL, TRUE);
137    if (!pImgObj) {
138        if (pStream) {
139            pStream->Release();
140        } else {
141            pDict->Release();
142        }
143    }
144}
145void CPDF_StreamContentParser::ParsePathObject()
146{
147    FX_FLOAT params[6] = {0};
148    int nParams = 0;
149    int last_pos = m_pSyntax->GetPos();
150    while (1) {
151        CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
152        FX_BOOL bProcessed = TRUE;
153        switch (type) {
154            case CPDF_StreamParser::EndOfData:
155                return;
156            case CPDF_StreamParser::Keyword: {
157                    int len = m_pSyntax->GetWordSize();
158                    if (len == 1) {
159                        switch (m_pSyntax->GetWordBuf()[0]) {
160                            case 'm':
161                                AddPathPoint(params[0], params[1], FXPT_MOVETO);
162                                nParams = 0;
163                                break;
164                            case 'l':
165                                AddPathPoint(params[0], params[1], FXPT_LINETO);
166                                nParams = 0;
167                                break;
168                            case 'c':
169                                AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
170                                AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
171                                AddPathPoint(params[4], params[5], FXPT_BEZIERTO);
172                                nParams = 0;
173                                break;
174                            case 'v':
175                                AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO);
176                                AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
177                                AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
178                                nParams = 0;
179                                break;
180                            case 'y':
181                                AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
182                                AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
183                                AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
184                                nParams = 0;
185                                break;
186                            case 'h':
187                                Handle_ClosePath();
188                                nParams = 0;
189                                break;
190                            default:
191                                bProcessed = FALSE;
192                                break;
193                        }
194                    } else if (len == 2) {
195                        if (m_pSyntax->GetWordBuf()[0] == 'r' && m_pSyntax->GetWordBuf()[1] == 'e') {
196                            AddPathRect(params[0], params[1], params[2], params[3]);
197                            nParams = 0;
198                        } else {
199                            bProcessed = FALSE;
200                        }
201                    } else {
202                        bProcessed = FALSE;
203                    }
204                    if (bProcessed) {
205                        last_pos = m_pSyntax->GetPos();
206                    }
207                    break;
208                }
209            case CPDF_StreamParser::Number: {
210                    if (nParams == 6) {
211                        break;
212                    }
213                    FX_BOOL bInteger;
214                    int value;
215                    FX_atonum(CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()), bInteger, &value);
216                    params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value;
217                    break;
218                }
219            default:
220                bProcessed = FALSE;
221        }
222        if (!bProcessed) {
223            m_pSyntax->SetPos(last_pos);
224            return;
225        }
226    }
227}
228CPDF_StreamParser::CPDF_StreamParser(const FX_BYTE* pData, FX_DWORD dwSize)
229{
230    m_pBuf = pData;
231    m_Size = dwSize;
232    m_Pos = 0;
233    m_pLastObj = NULL;
234}
235CPDF_StreamParser::~CPDF_StreamParser()
236{
237    if (m_pLastObj) {
238        m_pLastObj->Release();
239    }
240}
241FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder, FX_LPBYTE& dest_buf, FX_DWORD& dest_size)
242{
243    if (pDecoder == NULL) {
244        return (FX_DWORD) - 1;
245    }
246    int ncomps = pDecoder->CountComps();
247    int bpc = pDecoder->GetBPC();
248    int width = pDecoder->GetWidth();
249    int height = pDecoder->GetHeight();
250    int pitch = (width * ncomps * bpc + 7) / 8;
251    if (height == 0 || pitch > (1 << 30) / height) {
252        delete pDecoder;
253        return -1;
254    }
255    dest_size = pitch * height;
256    dest_buf = FX_Alloc( FX_BYTE, dest_size);
257    for (int row = 0; row < height; row ++) {
258        FX_LPBYTE pLine = pDecoder->GetScanline(row);
259        if (pLine == NULL) {
260            break;
261        }
262        FXSYS_memcpy32(dest_buf + row * pitch, pLine, pitch);
263    }
264    FX_DWORD srcoff = pDecoder->GetSrcOffset();
265    delete pDecoder;
266    return srcoff;
267}
268ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(FX_LPCBYTE src_buf, FX_DWORD src_size, int width, int height,
269        const CPDF_Dictionary* pParams);
270FX_DWORD _A85Decode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
271FX_DWORD _HexDecode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
272FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW, const FX_BYTE* src_buf, FX_DWORD src_size, CPDF_Dictionary* pParams,
273                                  FX_DWORD estimated_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
274FX_DWORD PDF_DecodeInlineStream(const FX_BYTE* src_buf, FX_DWORD limit,
275                                int width, int height, CFX_ByteString& decoder,
276                                CPDF_Dictionary* pParam, FX_LPBYTE& dest_buf, FX_DWORD& dest_size)
277{
278    if (decoder == FX_BSTRC("CCITTFaxDecode") || decoder == FX_BSTRC("CCF")) {
279        ICodec_ScanlineDecoder* pDecoder = FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
280        return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
281    } else if (decoder == FX_BSTRC("ASCII85Decode") || decoder == FX_BSTRC("A85")) {
282        return _A85Decode(src_buf, limit, dest_buf, dest_size);
283    } else if (decoder == FX_BSTRC("ASCIIHexDecode") || decoder == FX_BSTRC("AHx")) {
284        return _HexDecode(src_buf, limit, dest_buf, dest_size);
285    } else if (decoder == FX_BSTRC("FlateDecode") || decoder == FX_BSTRC("Fl")) {
286        return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size, dest_buf, dest_size);
287    } else if (decoder == FX_BSTRC("LZWDecode") || decoder == FX_BSTRC("LZW")) {
288        return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf, dest_size);
289    } else if (decoder == FX_BSTRC("DCTDecode") || decoder == FX_BSTRC("DCT")) {
290        ICodec_ScanlineDecoder* pDecoder = CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
291                                               src_buf, limit, width, height, 0, pParam ? pParam->GetInteger(FX_BSTRC("ColorTransform"), 1) : 1);
292        return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
293    } else if (decoder == FX_BSTRC("RunLengthDecode") || decoder == FX_BSTRC("RL")) {
294        return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
295    }
296    dest_size = 0;
297    dest_buf = 0;
298    return (FX_DWORD) - 1;
299}
300extern const FX_LPCSTR _PDF_CharType;
301CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_Dictionary* pDict, CPDF_Object* pCSObj, FX_BOOL bDecode)
302{
303    if (m_Pos == m_Size) {
304        return NULL;
305    }
306    if (_PDF_CharType[m_pBuf[m_Pos]] == 'W') {
307        m_Pos ++;
308    }
309    CFX_ByteString Decoder;
310    CPDF_Dictionary* pParam = NULL;
311    CPDF_Object* pFilter = pDict->GetElementValue(FX_BSTRC("Filter"));
312    if (pFilter == NULL) {
313    } else if (pFilter->GetType() == PDFOBJ_ARRAY) {
314        Decoder = ((CPDF_Array*)pFilter)->GetString(0);
315        CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms"));
316        if (pParams) {
317            pParam = pParams->GetDict(0);
318        }
319    } else {
320        Decoder = pFilter->GetString();
321        pParam = pDict->GetDict(FX_BSTRC("DecodeParms"));
322    }
323    FX_DWORD width = pDict->GetInteger(FX_BSTRC("Width"));
324    FX_DWORD height = pDict->GetInteger(FX_BSTRC("Height"));
325    FX_DWORD OrigSize = 0;
326    if (pCSObj != NULL) {
327        FX_DWORD bpc = pDict->GetInteger(FX_BSTRC("BitsPerComponent"));
328        FX_DWORD nComponents = 1;
329        CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
330        if (pCS == NULL) {
331            nComponents = 3;
332        } else {
333            nComponents = pCS->CountComponents();
334            pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
335        }
336        FX_DWORD pitch = width;
337        if (bpc && pitch > INT_MAX / bpc) {
338            return NULL;
339        }
340        pitch *= bpc;
341        if (nComponents && pitch > INT_MAX / nComponents) {
342            return NULL;
343        }
344        pitch *= nComponents;
345        if (pitch > INT_MAX - 7) {
346            return NULL;
347        }
348        pitch += 7;
349        pitch /= 8;
350        OrigSize = pitch;
351    } else {
352        if (width > INT_MAX - 7) {
353            return NULL;
354        }
355        OrigSize = ((width + 7) / 8);
356    }
357    if (height && OrigSize > INT_MAX / height) {
358        return NULL;
359    }
360    OrigSize *= height;
361    FX_LPBYTE pData = NULL;
362    FX_DWORD dwStreamSize;
363    if (Decoder.IsEmpty()) {
364        if (OrigSize > m_Size - m_Pos) {
365            OrigSize = m_Size - m_Pos;
366        }
367        pData = FX_Alloc(FX_BYTE, OrigSize);
368        FXSYS_memcpy32(pData, m_pBuf + m_Pos, OrigSize);
369        dwStreamSize = OrigSize;
370        m_Pos += OrigSize;
371    } else {
372        FX_DWORD dwDestSize = OrigSize;
373        dwStreamSize = PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, Decoder, pParam,
374                                              pData, dwDestSize);
375        if ((int)dwStreamSize < 0) {
376            return NULL;
377        }
378        if (bDecode) {
379            m_Pos += dwStreamSize;
380            dwStreamSize = dwDestSize;
381            if (pFilter->GetType() == PDFOBJ_ARRAY) {
382                ((CPDF_Array*)pFilter)->RemoveAt(0);
383                CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms"));
384                if (pParams) {
385                    pParams->RemoveAt(0);
386                }
387            } else {
388                pDict->RemoveAt(FX_BSTRC("Filter"));
389                pDict->RemoveAt(FX_BSTRC("DecodeParms"));
390            }
391        } else {
392            if (pData) {
393                FX_Free(pData);
394            }
395            FX_DWORD dwSavePos = m_Pos;
396            m_Pos += dwStreamSize;
397            while (1) {
398                FX_DWORD dwPrevPos = m_Pos;
399                CPDF_StreamParser::SyntaxType type = ParseNextElement();
400                if (type == CPDF_StreamParser::EndOfData) {
401                    break;
402                }
403                if (type != CPDF_StreamParser::Keyword) {
404                    dwStreamSize += m_Pos - dwPrevPos;
405                    continue;
406                }
407                if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' &&
408                        GetWordBuf()[1] == 'I') {
409                    m_Pos = dwPrevPos;
410                    break;
411                }
412                dwStreamSize += m_Pos - dwPrevPos;
413            }
414            m_Pos = dwSavePos;
415            pData = FX_Alloc(FX_BYTE, dwStreamSize);
416            FXSYS_memcpy32(pData, m_pBuf + m_Pos, dwStreamSize);
417            m_Pos += dwStreamSize;
418        }
419    }
420    pDict->SetAtInteger(FX_BSTRC("Length"), (int)dwStreamSize);
421    return CPDF_Stream::Create(pData, dwStreamSize, pDict);
422}
423#define MAX_WORD_BUFFER 256
424#define MAX_STRING_LENGTH	32767
425#define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274)
426#define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e)
427#define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166)
428CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement()
429{
430    if (m_pLastObj) {
431        m_pLastObj->Release();
432        m_pLastObj = NULL;
433    }
434    m_WordSize = 0;
435    FX_BOOL bIsNumber = TRUE;
436    if (m_Pos >= m_Size) {
437        return EndOfData;
438    }
439    int ch = m_pBuf[m_Pos++];
440    int type = _PDF_CharType[ch];
441    while (1) {
442        while (type == 'W') {
443            if (m_Size <= m_Pos) {
444                return EndOfData;
445            }
446            ch = m_pBuf[m_Pos++];
447            type = _PDF_CharType[ch];
448        }
449        if (ch != '%') {
450            break;
451        }
452        while (1) {
453            if (m_Size <= m_Pos) {
454                return EndOfData;
455            }
456            ch = m_pBuf[m_Pos++];
457            if (ch == '\r' || ch == '\n') {
458                break;
459            }
460        }
461        type = _PDF_CharType[ch];
462    }
463    if (type == 'D' && ch != '/') {
464        m_Pos --;
465        m_pLastObj = ReadNextObject();
466        return Others;
467    }
468    while (1) {
469        if (m_WordSize < MAX_WORD_BUFFER) {
470            m_WordBuffer[m_WordSize++] = ch;
471        }
472        if (type != 'N') {
473            bIsNumber = FALSE;
474        }
475        if (m_Size <= m_Pos) {
476            break;
477        }
478        ch = m_pBuf[m_Pos++];
479        type = _PDF_CharType[ch];
480        if (type == 'D' || type == 'W') {
481            m_Pos --;
482            break;
483        }
484    }
485    m_WordBuffer[m_WordSize] = 0;
486    if (bIsNumber) {
487        return Number;
488    }
489    if (m_WordBuffer[0] == '/') {
490        return Name;
491    }
492    if (m_WordSize == 4) {
493        if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
494            m_pLastObj = CPDF_Boolean::Create(TRUE);
495            return Others;
496        }
497        if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
498            m_pLastObj = CPDF_Null::Create();
499            return Others;
500        }
501    } else if (m_WordSize == 5) {
502        if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
503            m_pLastObj = CPDF_Boolean::Create(FALSE);
504            return Others;
505        }
506    }
507    return Keyword;
508}
509void CPDF_StreamParser::SkipPathObject()
510{
511    FX_DWORD command_startpos = m_Pos;
512    if (m_Pos >= m_Size) {
513        return;
514    }
515    int ch = m_pBuf[m_Pos++];
516    int type = _PDF_CharType[ch];
517    while (1) {
518        while (type == 'W') {
519            if (m_Pos >= m_Size) {
520                return;
521            }
522            ch = m_pBuf[m_Pos++];
523            type = _PDF_CharType[ch];
524        }
525        if (type != 'N') {
526            m_Pos = command_startpos;
527            return;
528        }
529        while (1) {
530            while (type != 'W') {
531                if (m_Pos >= m_Size) {
532                    return;
533                }
534                ch = m_pBuf[m_Pos++];
535                type = _PDF_CharType[ch];
536            }
537            while (type == 'W') {
538                if (m_Pos >= m_Size) {
539                    return;
540                }
541                ch = m_pBuf[m_Pos++];
542                type = _PDF_CharType[ch];
543            }
544            if (type == 'N') {
545                continue;
546            }
547            FX_DWORD op_startpos = m_Pos - 1;
548            while (type != 'W' && type != 'D') {
549                if (m_Pos >= m_Size) {
550                    return;
551                }
552                ch = m_pBuf[m_Pos++];
553                type = _PDF_CharType[ch];
554            }
555            if (m_Pos - op_startpos == 2) {
556                int op = m_pBuf[op_startpos];
557                if (op == 'm' || op == 'l' || op == 'c' || op == 'v' || op == 'y') {
558                    command_startpos = m_Pos;
559                    break;
560                }
561            } else if (m_Pos - op_startpos == 3) {
562                if (m_pBuf[op_startpos] == 'r' && m_pBuf[op_startpos + 1] == 'e') {
563                    command_startpos = m_Pos;
564                    break;
565                }
566            }
567            m_Pos = command_startpos;
568            return;
569        }
570    }
571}
572CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray, FX_BOOL bInArray)
573{
574    FX_BOOL bIsNumber;
575    GetNextWord(bIsNumber);
576    if (m_WordSize == 0) {
577        return NULL;
578    }
579    if (bIsNumber) {
580        m_WordBuffer[m_WordSize] = 0;
581        return CPDF_Number::Create(CFX_ByteStringC(m_WordBuffer, m_WordSize));
582    }
583    int first_char = m_WordBuffer[0];
584    if (first_char == '/') {
585        return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
586    }
587    if (first_char == '(') {
588        return CPDF_String::Create(ReadString());
589    }
590    if (first_char == '<') {
591        if (m_WordSize == 1) {
592            return CPDF_String::Create(ReadHexString(), TRUE);
593        }
594        CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
595        while (1) {
596            GetNextWord(bIsNumber);
597            if (m_WordSize == 0) {
598                pDict->Release();
599                return NULL;
600            }
601            if (m_WordSize == 2 && m_WordBuffer[0] == '>') {
602                break;
603            }
604            if (m_WordBuffer[0] != '/') {
605                pDict->Release();
606                return NULL;
607            }
608            CFX_ByteString key = PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
609            CPDF_Object* pObj = ReadNextObject(TRUE);
610            if (pObj == NULL) {
611                if (pDict) {
612                    pDict->Release();
613                }
614                return NULL;
615            }
616            if (!key.IsEmpty()) {
617                pDict->SetAt(key, pObj);
618            } else {
619                pObj->Release();
620            }
621        }
622        return pDict;
623    }
624    if (first_char == '[') {
625        if (!bAllowNestedArray && bInArray) {
626            return NULL;
627        }
628        CPDF_Array* pArray = CPDF_Array::Create();
629        while (1) {
630            CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE);
631            if (pObj == NULL) {
632                if (m_WordSize == 0 || m_WordBuffer[0] == ']') {
633                    return pArray;
634                }
635                if (m_WordBuffer[0] == '[') {
636                    continue;
637                }
638            } else {
639                pArray->Add(pObj);
640            }
641        }
642    }
643    if (m_WordSize == 4) {
644        if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
645            return CPDF_Boolean::Create(TRUE);
646        }
647        if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
648            return CPDF_Null::Create();
649        }
650    } else if (m_WordSize == 5) {
651        if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
652            return CPDF_Boolean::Create(FALSE);
653        }
654    }
655    return NULL;
656}
657void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber)
658{
659    m_WordSize = 0;
660    bIsNumber = TRUE;
661    if (m_Size <= m_Pos) {
662        return;
663    }
664    int ch = m_pBuf[m_Pos++];
665    int type = _PDF_CharType[ch];
666    while (1) {
667        while (type == 'W') {
668            if (m_Size <= m_Pos) {
669                return;
670            }
671            ch = m_pBuf[m_Pos++];
672            type = _PDF_CharType[ch];
673        }
674        if (ch != '%') {
675            break;
676        }
677        while (1) {
678            if (m_Size <= m_Pos) {
679                return;
680            }
681            ch = m_pBuf[m_Pos++];
682            if (ch == '\r' || ch == '\n') {
683                break;
684            }
685        }
686        type = _PDF_CharType[ch];
687    }
688    if (type == 'D') {
689        bIsNumber = FALSE;
690        m_WordBuffer[m_WordSize++] = ch;
691        if (ch == '/') {
692            while (1) {
693                if (m_Size <= m_Pos) {
694                    return;
695                }
696                ch = m_pBuf[m_Pos++];
697                type = _PDF_CharType[ch];
698                if (type != 'R' && type != 'N') {
699                    m_Pos --;
700                    return;
701                }
702                if (m_WordSize < MAX_WORD_BUFFER) {
703                    m_WordBuffer[m_WordSize++] = ch;
704                }
705            }
706        } else if (ch == '<') {
707            if (m_Size <= m_Pos) {
708                return;
709            }
710            ch = m_pBuf[m_Pos++];
711            if (ch == '<') {
712                m_WordBuffer[m_WordSize++] = ch;
713            } else {
714                m_Pos --;
715            }
716        } else if (ch == '>') {
717            if (m_Size <= m_Pos) {
718                return;
719            }
720            ch = m_pBuf[m_Pos++];
721            if (ch == '>') {
722                m_WordBuffer[m_WordSize++] = ch;
723            } else {
724                m_Pos --;
725            }
726        }
727        return;
728    }
729    while (1) {
730        if (m_WordSize < MAX_WORD_BUFFER) {
731            m_WordBuffer[m_WordSize++] = ch;
732        }
733        if (type != 'N') {
734            bIsNumber = FALSE;
735        }
736        if (m_Size <= m_Pos) {
737            return;
738        }
739        ch = m_pBuf[m_Pos++];
740        type = _PDF_CharType[ch];
741        if (type == 'D' || type == 'W') {
742            m_Pos --;
743            break;
744        }
745    }
746}
747CFX_ByteString CPDF_StreamParser::ReadString()
748{
749    if (m_Size <= m_Pos) {
750        return CFX_ByteString();
751    }
752    int ch = m_pBuf[m_Pos++];
753    CFX_ByteTextBuf buf;
754    int parlevel = 0;
755    int status = 0, iEscCode = 0;
756    while (1) {
757        switch (status) {
758            case 0:
759                if (ch == ')') {
760                    if (parlevel == 0) {
761                        if (buf.GetLength() > MAX_STRING_LENGTH) {
762                            return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
763                        }
764                        return buf.GetByteString();
765                    }
766                    parlevel --;
767                    buf.AppendChar(')');
768                } else if (ch == '(') {
769                    parlevel ++;
770                    buf.AppendChar('(');
771                } else if (ch == '\\') {
772                    status = 1;
773                } else {
774                    buf.AppendChar((char)ch);
775                }
776                break;
777            case 1:
778                if (ch >= '0' && ch <= '7') {
779                    iEscCode = ch - '0';
780                    status = 2;
781                    break;
782                }
783                if (ch == 'n') {
784                    buf.AppendChar('\n');
785                } else if (ch == 'r') {
786                    buf.AppendChar('\r');
787                } else if (ch == 't') {
788                    buf.AppendChar('\t');
789                } else if (ch == 'b') {
790                    buf.AppendChar('\b');
791                } else if (ch == 'f') {
792                    buf.AppendChar('\f');
793                } else if (ch == '\r') {
794                    status = 4;
795                    break;
796                } else if (ch == '\n') {
797                } else {
798                    buf.AppendChar(ch);
799                }
800                status = 0;
801                break;
802            case 2:
803                if (ch >= '0' && ch <= '7') {
804                    iEscCode = iEscCode * 8 + ch - '0';
805                    status = 3;
806                } else {
807                    buf.AppendChar(iEscCode);
808                    status = 0;
809                    continue;
810                }
811                break;
812            case 3:
813                if (ch >= '0' && ch <= '7') {
814                    iEscCode = iEscCode * 8 + ch - '0';
815                    buf.AppendChar(iEscCode);
816                    status = 0;
817                } else {
818                    buf.AppendChar(iEscCode);
819                    status = 0;
820                    continue;
821                }
822                break;
823            case 4:
824                status = 0;
825                if (ch != '\n') {
826                    continue;
827                }
828                break;
829        }
830        if (m_Size <= m_Pos) {
831            break;
832        }
833        ch = m_pBuf[m_Pos++];
834    }
835    if (m_Size > m_Pos) {
836        ch = m_pBuf[m_Pos++];
837    }
838    if (buf.GetLength() > MAX_STRING_LENGTH) {
839        return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
840    }
841    return buf.GetByteString();
842}
843CFX_ByteString CPDF_StreamParser::ReadHexString()
844{
845    if (m_Size <= m_Pos) {
846        return CFX_ByteString();
847    }
848    int ch = m_pBuf[m_Pos++];
849    CFX_ByteTextBuf buf;
850    FX_BOOL bFirst = TRUE;
851    int code = 0;
852    while (1) {
853        if (ch == '>') {
854            break;
855        }
856        if (ch >= '0' && ch <= '9') {
857            if (bFirst) {
858                code = (ch - '0') * 16;
859            } else {
860                code += ch - '0';
861                buf.AppendChar((char)code);
862            }
863            bFirst = !bFirst;
864        } else if (ch >= 'A' && ch <= 'F') {
865            if (bFirst) {
866                code = (ch - 'A' + 10) * 16;
867            } else {
868                code += ch - 'A' + 10;
869                buf.AppendChar((char)code);
870            }
871            bFirst = !bFirst;
872        } else if (ch >= 'a' && ch <= 'f') {
873            if (bFirst) {
874                code = (ch - 'a' + 10) * 16;
875            } else {
876                code += ch - 'a' + 10;
877                buf.AppendChar((char)code);
878            }
879            bFirst = !bFirst;
880        }
881        if (m_Size <= m_Pos) {
882            break;
883        }
884        ch = m_pBuf[m_Pos++];
885    }
886    if (!bFirst) {
887        buf.AppendChar((char)code);
888    }
889    if (buf.GetLength() > MAX_STRING_LENGTH) {
890        return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
891    }
892    return buf.GetByteString();
893}
894#define PAGEPARSE_STAGE_GETCONTENT		1
895#define PAGEPARSE_STAGE_PARSE			2
896#define PAGEPARSE_STAGE_CHECKCLIP		3
897CPDF_ContentParser::CPDF_ContentParser()
898{
899    m_pParser = NULL;
900    m_pStreamArray = NULL;
901    m_pSingleStream = NULL;
902    m_pData = NULL;
903    m_Status = Ready;
904    m_pType3Char = NULL;
905}
906CPDF_ContentParser::~CPDF_ContentParser()
907{
908    Clear();
909}
910void CPDF_ContentParser::Clear()
911{
912    if (m_pParser) {
913        delete m_pParser;
914    }
915    if (m_pSingleStream) {
916        delete m_pSingleStream;
917    }
918    if (m_pStreamArray) {
919        for (FX_DWORD i = 0; i < m_nStreams; i ++)
920            if (m_pStreamArray[i]) {
921                delete m_pStreamArray[i];
922            }
923        FX_Free(m_pStreamArray);
924    }
925    if (m_pData && m_pSingleStream == NULL) {
926        FX_Free((void*)m_pData);
927    }
928    m_pParser = NULL;
929    m_pStreamArray = NULL;
930    m_pSingleStream = NULL;
931    m_pData = NULL;
932    m_Status = Ready;
933}
934void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions)
935{
936    if (m_Status != Ready || pPage == NULL || pPage->m_pDocument == NULL || pPage->m_pFormDict == NULL) {
937        m_Status = Done;
938        return;
939    }
940    m_pObjects = pPage;
941    m_bForm = FALSE;
942    if (pOptions) {
943        m_Options = *pOptions;
944    }
945    m_Status = ToBeContinued;
946    m_InternalStage = PAGEPARSE_STAGE_GETCONTENT;
947    m_CurrentOffset = 0;
948    CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue(FX_BSTRC("Contents"));
949    if (pContent == NULL) {
950        m_Status = Done;
951        return;
952    }
953    if (pContent->GetType() == PDFOBJ_STREAM) {
954        m_nStreams = 0;
955        m_pSingleStream = FX_NEW CPDF_StreamAcc;
956        m_pSingleStream->LoadAllData((CPDF_Stream*)pContent, FALSE);
957    } else if (pContent->GetType() == PDFOBJ_ARRAY) {
958        CPDF_Array* pArray = (CPDF_Array*)pContent;
959        m_nStreams = pArray->GetCount();
960        if (m_nStreams == 0) {
961            m_Status = Done;
962            return;
963        }
964        m_pStreamArray = FX_Alloc(CPDF_StreamAcc*, m_nStreams);
965        FXSYS_memset32(m_pStreamArray, 0, sizeof(CPDF_StreamAcc*) * m_nStreams);
966    } else {
967        m_Status = Done;
968        return;
969    }
970}
971void CPDF_ContentParser::Start(CPDF_Form* pForm, CPDF_AllStates* pGraphicStates,
972                               CFX_AffineMatrix* pParentMatrix, CPDF_Type3Char* pType3Char, CPDF_ParseOptions* pOptions, int level)
973{
974    m_pType3Char = pType3Char;
975    m_pObjects = pForm;
976    m_bForm = TRUE;
977    CFX_AffineMatrix form_matrix = pForm->m_pFormDict->GetMatrix(FX_BSTRC("Matrix"));
978    if (pGraphicStates) {
979        form_matrix.Concat(pGraphicStates->m_CTM);
980    }
981    CPDF_Array* pBBox = pForm->m_pFormDict->GetArray(FX_BSTRC("BBox"));
982    CFX_FloatRect form_bbox;
983    CPDF_Path ClipPath;
984    if (pBBox) {
985        form_bbox = pBBox->GetRect();
986        ClipPath.New();
987        ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, form_bbox.top);
988        ClipPath.Transform(&form_matrix);
989        if (pParentMatrix) {
990            ClipPath.Transform(pParentMatrix);
991        }
992        form_bbox.Transform(&form_matrix);
993        if (pParentMatrix) {
994            form_bbox.Transform(pParentMatrix);
995        }
996    }
997    CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict(FX_BSTRC("Resources"));
998    m_pParser = FX_NEW CPDF_StreamContentParser;
999    m_pParser->Initialize();
1000    m_pParser->PrepareParse(pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, pParentMatrix, pForm,
1001                            pResources, &form_bbox, pOptions, pGraphicStates, level);
1002    m_pParser->m_pCurStates->m_CTM = form_matrix;
1003    m_pParser->m_pCurStates->m_ParentMatrix = form_matrix;
1004    if (ClipPath.NotNull()) {
1005        m_pParser->m_pCurStates->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, TRUE);
1006    }
1007    if (pForm->m_Transparency & PDFTRANS_GROUP) {
1008        CPDF_GeneralStateData* pData = m_pParser->m_pCurStates->m_GeneralState.GetModify();
1009        pData->m_BlendType = FXDIB_BLEND_NORMAL;
1010        pData->m_StrokeAlpha = 1.0f;
1011        pData->m_FillAlpha = 1.0f;
1012        pData->m_pSoftMask = NULL;
1013    }
1014    m_nStreams = 0;
1015    m_pSingleStream = FX_NEW CPDF_StreamAcc;
1016    if (pForm->m_pDocument) {
1017        m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
1018    } else {
1019        m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
1020    }
1021    m_pData = (FX_LPBYTE)m_pSingleStream->GetData();
1022    m_Size = m_pSingleStream->GetSize();
1023    m_Status = ToBeContinued;
1024    m_InternalStage = PAGEPARSE_STAGE_PARSE;
1025    m_CurrentOffset = 0;
1026}
1027void CPDF_ContentParser::Continue(IFX_Pause* pPause)
1028{
1029    int steps = 0;
1030    while (m_Status == ToBeContinued) {
1031        if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) {
1032            if (m_CurrentOffset == m_nStreams) {
1033                if (m_pStreamArray) {
1034                    m_Size = 0;
1035                    FX_DWORD i;
1036                    for (i = 0; i < m_nStreams; i ++) {
1037                        FX_DWORD size = m_pStreamArray[i]->GetSize();
1038                        if (m_Size + size + 1 <= m_Size) {
1039							m_Status = Done;
1040							return;
1041                        }
1042                        m_Size += size + 1;
1043                    }
1044                    m_pData = FX_Alloc(FX_BYTE, m_Size);
1045                    if (!m_pData) {
1046                        m_Status = Done;
1047                        return;
1048                    }
1049                    FX_DWORD pos = 0;
1050                    for (i = 0; i < m_nStreams; i ++) {
1051                        FXSYS_memcpy32(m_pData + pos, m_pStreamArray[i]->GetData(), m_pStreamArray[i]->GetSize());
1052                        pos += m_pStreamArray[i]->GetSize() + 1;
1053                        m_pData[pos - 1] = ' ';
1054                        delete m_pStreamArray[i];
1055                    }
1056                    FX_Free(m_pStreamArray);
1057                    m_pStreamArray = NULL;
1058                } else {
1059                    m_pData = (FX_LPBYTE)m_pSingleStream->GetData();
1060                    m_Size = m_pSingleStream->GetSize();
1061                }
1062                m_InternalStage = PAGEPARSE_STAGE_PARSE;
1063                m_CurrentOffset = 0;
1064            } else {
1065                CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray(FX_BSTRC("Contents"));
1066                m_pStreamArray[m_CurrentOffset] = FX_NEW CPDF_StreamAcc;
1067                CPDF_Stream* pStreamObj = (CPDF_Stream*)pContent->GetElementValue(m_CurrentOffset);
1068                m_pStreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE);
1069                m_CurrentOffset ++;
1070            }
1071        }
1072        if (m_InternalStage == PAGEPARSE_STAGE_PARSE) {
1073            if (m_pParser == NULL) {
1074                m_pParser = FX_NEW CPDF_StreamContentParser;
1075                m_pParser->Initialize();
1076                m_pParser->PrepareParse(m_pObjects->m_pDocument, m_pObjects->m_pPageResources, NULL, NULL, m_pObjects,
1077                                        m_pObjects->m_pResources, &m_pObjects->m_BBox, &m_Options, NULL, 0);
1078                m_pParser->m_pCurStates->m_ColorState.GetModify()->Default();
1079            }
1080            if (m_CurrentOffset >= m_Size) {
1081                m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
1082            } else {
1083                m_CurrentOffset += m_pParser->Parse(m_pData + m_CurrentOffset, m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
1084                if (m_pParser->m_bAbort) {
1085                    m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
1086                    continue;
1087                }
1088            }
1089        }
1090        if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
1091            if (m_pType3Char) {
1092                m_pType3Char->m_bColored = m_pParser->m_bColored;
1093                m_pType3Char->m_Width = FXSYS_round(m_pParser->m_Type3Data[0] * 1000);
1094                m_pType3Char->m_BBox.left = FXSYS_round(m_pParser->m_Type3Data[2] * 1000);
1095                m_pType3Char->m_BBox.bottom = FXSYS_round(m_pParser->m_Type3Data[3] * 1000);
1096                m_pType3Char->m_BBox.right = FXSYS_round(m_pParser->m_Type3Data[4] * 1000);
1097                m_pType3Char->m_BBox.top = FXSYS_round(m_pParser->m_Type3Data[5] * 1000);
1098            }
1099            FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition();
1100            while (pos) {
1101                CPDF_PageObject* pObj = (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos);
1102                if (pObj->m_ClipPath.IsNull()) {
1103                    continue;
1104                }
1105                if (pObj->m_ClipPath.GetPathCount() != 1) {
1106                    continue;
1107                }
1108                if (pObj->m_ClipPath.GetTextCount()) {
1109                    continue;
1110                }
1111                CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
1112                if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) {
1113                    continue;
1114                }
1115                CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0),
1116                                       ClipPath.GetPointX(2), ClipPath.GetPointY(2));
1117                CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top);
1118                if (old_rect.Contains(obj_rect)) {
1119                    pObj->m_ClipPath.SetNull();
1120                }
1121            }
1122            m_Status = Done;
1123            return;
1124        }
1125        steps ++;
1126        if (pPause && pPause->NeedToPauseNow()) {
1127            break;
1128        }
1129    }
1130}
1131int CPDF_ContentParser::EstimateProgress()
1132{
1133    if (m_Status == Ready) {
1134        return 0;
1135    }
1136    if (m_Status == Done) {
1137        return 100;
1138    }
1139    if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) {
1140        return 10;
1141    }
1142    if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
1143        return 90;
1144    }
1145    return 10 + 80 * m_CurrentOffset / m_Size;
1146}
1147