SkPdfNativeTokenizer.h revision 571c70b95f56e22b5a7d6f4f288aa6c9a925a64f
13aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
23aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
33aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
4571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h"
5571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDict.h"
6571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <math.h>
7571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <string.h>
8571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
9571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfMapper;
10571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfDictionary;
11571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// White Spaces
13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNUL_PdfWhiteSpace '\x00'
14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kHT_PdfWhiteSpace  '\x09'
15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kLF_PdfWhiteSpace  '\x0A'
16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kFF_PdfWhiteSpace  '\x0C'
17571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kCR_PdfWhiteSpace  '\x0D'
18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kSP_PdfWhiteSpace  '\x20'
19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// PdfDelimiters
21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedRoundBracket_PdfDelimiter        '('
22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedRoundBracket_PdfDelimiter        ')'
23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedInequityBracket_PdfDelimiter     '<'
24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedInequityBracket_PdfDelimiter     '>'
25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedSquareBracket_PdfDelimiter       '['
26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedSquareBracket_PdfDelimiter       ']'
27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedCurlyBracket_PdfDelimiter        '{'
28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedCurlyBracket_PdfDelimiter        '}'
29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNamed_PdfDelimiter                     '/'
30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kComment_PdfDelimiter                   '%'
31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kEscape_PdfSpecial                      '\\'
33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kBackspace_PdfSpecial                   '\x08'
34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// we should evaluate all options. might be even different from one machine to another
37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 1) expand expression, let compiler optimize it
38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 2) binary search
39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 3) linear search in array
40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 4) vector (e.f. T type[256] .. return type[ch] ...
41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 5) manually build the expression with least number of operators, e.g. for consecutive
42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// chars, we can use an binary equal ignoring last bit
43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace))
44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedRoundBracket_PdfDelimiter)||\
50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedInequityBracket_PdfDelimiter)||\
52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedSquareBracket_PdfDelimiter)||\
54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kNamed_PdfDelimiter)||\
57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kComment_PdfDelimiter))
58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-')
63571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* skipPdfWhiteSpaces(unsigned char* buffer, size_t len);
65571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* endOfPdfToken(unsigned char* start, size_t len);
66571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* skipPdfComment(unsigned char* start, size_t len);
67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): typedef read and integer tyepes? make less readable...
69571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef double SkPdfReal;
70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef int64_t SkPdfInteger;
71571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
72571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// an allocator only allocates memory, and it deletes it all when the allocator is destroyed
73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it
74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// while the user is looking at the image
75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
76571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfObject;
77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator {
79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define BUFFER_SIZE 1024
80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkTDArray<SkPdfObject*> fHistory;
81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkTDArray<void*> fHandles;
82571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfObject* fCurrent;
83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    int fCurrentUsed;
84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
85571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfObject* allocBlock();
86571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
873aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic:
88571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator() {
89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fCurrent = allocBlock();
90571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fCurrentUsed = 0;
91571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
92571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    ~SkPdfAllocator();
94571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfObject* allocObject();
96571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
97571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    // TODO(edisonn): free this memory in destructor, track the usage?
98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void* alloc(size_t bytes) {
99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        void* data = malloc(bytes);
100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fHandles.push(data);
101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        return data;
102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
103571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator);
106571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
107571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comenum SkPdfTokenType {
108571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    kKeyword_TokenType,
109571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    kObject_TokenType,
110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
112571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comstruct PdfToken {
113571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    const char*      fKeyword;
114571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    size_t           fKeywordLength;
115571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfObject*     fObject;
116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfTokenType   fType;
117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
118571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
119571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer {
122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic:
123571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator);
124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator);
125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1263aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com    virtual ~SkPdfNativeTokenizer();
127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool readToken(PdfToken* token);
129571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool readTokenCore(PdfToken* token);
130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void PutBack(PdfToken token);
131571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
132571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate:
133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    const SkPdfMapper* fMapper;
134571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator* fAllocator;
135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    unsigned char* fUncompressedStreamStart;
137571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    unsigned char* fUncompressedStream;
138571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    unsigned char* fUncompressedStreamEnd;
139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
140571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool fEmpty;
141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool fHasPutBack;
142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    PdfToken fPutBack;
1433aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com};
1443aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
1453aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#endif  // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
146