SkPdfNativeTokenizer.h revision 3aa355527a3b91d3e12b8bee49e5637d00a736ca
13aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
23aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
33aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
4571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h"
5571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDict.h"
6571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <math.h>
7571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <string.h>
8571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
9571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfMapper;
10571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfDictionary;
1178b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.comclass SkPdfImageDictionary;
12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// White Spaces
14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNUL_PdfWhiteSpace '\x00'
15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kHT_PdfWhiteSpace  '\x09'
16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kLF_PdfWhiteSpace  '\x0A'
17571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kFF_PdfWhiteSpace  '\x0C'
18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kCR_PdfWhiteSpace  '\x0D'
19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kSP_PdfWhiteSpace  '\x20'
20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// PdfDelimiters
22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedRoundBracket_PdfDelimiter        '('
23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedRoundBracket_PdfDelimiter        ')'
24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedInequityBracket_PdfDelimiter     '<'
25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedInequityBracket_PdfDelimiter     '>'
26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedSquareBracket_PdfDelimiter       '['
27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedSquareBracket_PdfDelimiter       ']'
28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedCurlyBracket_PdfDelimiter        '{'
29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedCurlyBracket_PdfDelimiter        '}'
30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNamed_PdfDelimiter                     '/'
31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kComment_PdfDelimiter                   '%'
32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kEscape_PdfSpecial                      '\\'
34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kBackspace_PdfSpecial                   '\x08'
35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// we should evaluate all options. might be even different from one machine to another
38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 1) expand expression, let compiler optimize it
39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 2) binary search
40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 3) linear search in array
41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 4) vector (e.f. T type[256] .. return type[ch] ...
42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 5) manually build the expression with least number of operators, e.g. for consecutive
43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// chars, we can use an binary equal ignoring last bit
44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace))
45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedRoundBracket_PdfDelimiter)||\
51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedInequityBracket_PdfDelimiter)||\
53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedSquareBracket_PdfDelimiter)||\
55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kNamed_PdfDelimiter)||\
58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kComment_PdfDelimiter))
59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
634ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
654ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.comconst unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end);
664ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.comconst unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end);
67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): typedef read and integer tyepes? make less readable...
69571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef double SkPdfReal;
70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef int64_t SkPdfInteger;
71571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
72571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// an allocator only allocates memory, and it deletes it all when the allocator is destroyed
73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it
74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// while the user is looking at the image
75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
763aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeObject;
77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator {
79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define BUFFER_SIZE 1024
803aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkTDArray<SkPdfNativeObject*> fHistory;
81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkTDArray<void*> fHandles;
823aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* fCurrent;
83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    int fCurrentUsed;
84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
853aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* allocBlock();
86a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com    size_t fSizeInBytes;
87571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
883aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic:
89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator() {
90a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        fSizeInBytes = sizeof(*this);
91571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fCurrent = allocBlock();
92571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fCurrentUsed = 0;
93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
94571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    ~SkPdfAllocator();
96571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
973aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* allocObject();
98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    // TODO(edisonn): free this memory in destructor, track the usage?
100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void* alloc(size_t bytes) {
101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        void* data = malloc(bytes);
102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fHandles.push(data);
103a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        fSizeInBytes += bytes;
104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        return data;
105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
106a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com
1077b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com    size_t bytesUsed() const {
108a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        return fSizeInBytes;
109a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com    }
110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1123aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeDoc;
1133aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comconst unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
114571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
115571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comenum SkPdfTokenType {
116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    kKeyword_TokenType,
117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    kObject_TokenType,
118571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
119571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comstruct PdfToken {
121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    const char*      fKeyword;
122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    size_t           fKeywordLength;
1233aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject*     fObject;
124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfTokenType   fType;
125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
126571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
129571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer {
130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic:
1313aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
1323aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1343aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com    virtual ~SkPdfNativeTokenizer();
135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool readToken(PdfToken* token);
137571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool readTokenCore(PdfToken* token);
138571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void PutBack(PdfToken token);
13978b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com    SkPdfImageDictionary* readInlineImage();
140571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate:
1423aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeDoc* fDoc;
143571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    const SkPdfMapper* fMapper;
144571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator* fAllocator;
145571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1462ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com    const unsigned char* fUncompressedStreamStart;
1472ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com    const unsigned char* fUncompressedStream;
1482ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com    const unsigned char* fUncompressedStreamEnd;
149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
150571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool fEmpty;
151571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool fHasPutBack;
152571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    PdfToken fPutBack;
1533aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com};
1543aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
1553aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#endif  // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
156