SkPdfNativeTokenizer.h revision 78b38b130deb8bcfa41611039875ce0162542ac1
13aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
23aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
33aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
4571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h"
5571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDict.h"
6571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <math.h>
7571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <string.h>
8571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
9571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfMapper;
10571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfDictionary;
1178b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.comclass SkPdfImageDictionary;
12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// White Spaces
14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNUL_PdfWhiteSpace '\x00'
15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kHT_PdfWhiteSpace  '\x09'
16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kLF_PdfWhiteSpace  '\x0A'
17571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kFF_PdfWhiteSpace  '\x0C'
18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kCR_PdfWhiteSpace  '\x0D'
19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kSP_PdfWhiteSpace  '\x20'
20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// PdfDelimiters
22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedRoundBracket_PdfDelimiter        '('
23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedRoundBracket_PdfDelimiter        ')'
24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedInequityBracket_PdfDelimiter     '<'
25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedInequityBracket_PdfDelimiter     '>'
26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedSquareBracket_PdfDelimiter       '['
27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedSquareBracket_PdfDelimiter       ']'
28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedCurlyBracket_PdfDelimiter        '{'
29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedCurlyBracket_PdfDelimiter        '}'
30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNamed_PdfDelimiter                     '/'
31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kComment_PdfDelimiter                   '%'
32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kEscape_PdfSpecial                      '\\'
34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kBackspace_PdfSpecial                   '\x08'
35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// we should evaluate all options. might be even different from one machine to another
38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 1) expand expression, let compiler optimize it
39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 2) binary search
40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 3) linear search in array
41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 4) vector (e.f. T type[256] .. return type[ch] ...
42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 5) manually build the expression with least number of operators, e.g. for consecutive
43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// chars, we can use an binary equal ignoring last bit
44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace))
45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedRoundBracket_PdfDelimiter)||\
51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedInequityBracket_PdfDelimiter)||\
53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedSquareBracket_PdfDelimiter)||\
55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kNamed_PdfDelimiter)||\
58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kComment_PdfDelimiter))
59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
63571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-')
64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
65571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* skipPdfWhiteSpaces(unsigned char* buffer, size_t len);
66571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* endOfPdfToken(unsigned char* start, size_t len);
67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* skipPdfComment(unsigned char* start, size_t len);
68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
69571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): typedef read and integer tyepes? make less readable...
70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef double SkPdfReal;
71571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef int64_t SkPdfInteger;
72571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// an allocator only allocates memory, and it deletes it all when the allocator is destroyed
74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it
75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// while the user is looking at the image
76571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfObject;
78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator {
80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define BUFFER_SIZE 1024
81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkTDArray<SkPdfObject*> fHistory;
82571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkTDArray<void*> fHandles;
83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfObject* fCurrent;
84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    int fCurrentUsed;
85571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
86571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfObject* allocBlock();
87a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com    size_t fSizeInBytes;
88571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
893aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic:
90571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator() {
91a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        fSizeInBytes = sizeof(*this);
92571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fCurrent = allocBlock();
93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fCurrentUsed = 0;
94571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
96571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    ~SkPdfAllocator();
97571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfObject* allocObject();
99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    // TODO(edisonn): free this memory in destructor, track the usage?
101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void* alloc(size_t bytes) {
102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        void* data = malloc(bytes);
103571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fHandles.push(data);
104a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        fSizeInBytes += bytes;
105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        return data;
106571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
107a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com
1087b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com    size_t bytesUsed() const {
109a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        return fSizeInBytes;
110a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com    }
111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
112571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
113951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.comclass SkNativeParsedPDF;
114951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.comunsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc);
115571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comenum SkPdfTokenType {
117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    kKeyword_TokenType,
118571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    kObject_TokenType,
119571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comstruct PdfToken {
122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    const char*      fKeyword;
123571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    size_t           fKeywordLength;
124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfObject*     fObject;
125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfTokenType   fType;
126571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
129571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer {
131571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic:
132951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.com    SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc);
133951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.com    SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc);
134571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1353aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com    virtual ~SkPdfNativeTokenizer();
136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
137571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool readToken(PdfToken* token);
138571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool readTokenCore(PdfToken* token);
139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void PutBack(PdfToken token);
14078b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com    SkPdfImageDictionary* readInlineImage();
141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate:
143951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.com    SkNativeParsedPDF* fDoc;
144571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    const SkPdfMapper* fMapper;
145571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator* fAllocator;
146571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
147571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    unsigned char* fUncompressedStreamStart;
148571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    unsigned char* fUncompressedStream;
149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    unsigned char* fUncompressedStreamEnd;
150571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
151571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool fEmpty;
152571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool fHasPutBack;
153571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    PdfToken fPutBack;
1543aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com};
1553aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
1563aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#endif  // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
157