SkPdfNativeTokenizer.h revision cf2cfa174ca878c144e17e9fc60ca8e9070d7ded
1cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com/*
2cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * Copyright 2013 Google Inc.
3cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com *
4cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * Use of this source code is governed by a BSD-style license that can be
5cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * found in the LICENSE file.
6cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com */
7cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com
8cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#ifndef SkPdfNativeTokenizer_DEFINED
9cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#define SkPdfNativeTokenizer_DEFINED
103aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
11571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h"
12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDict.h"
13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <math.h>
14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <string.h>
15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfDictionary;
1778b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.comclass SkPdfImageDictionary;
18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// White Spaces
20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNUL_PdfWhiteSpace '\x00'
21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kHT_PdfWhiteSpace  '\x09'
22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kLF_PdfWhiteSpace  '\x0A'
23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kFF_PdfWhiteSpace  '\x0C'
24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kCR_PdfWhiteSpace  '\x0D'
25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kSP_PdfWhiteSpace  '\x20'
26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// PdfDelimiters
28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedRoundBracket_PdfDelimiter        '('
29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedRoundBracket_PdfDelimiter        ')'
30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedInequityBracket_PdfDelimiter     '<'
31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedInequityBracket_PdfDelimiter     '>'
32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedSquareBracket_PdfDelimiter       '['
33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedSquareBracket_PdfDelimiter       ']'
34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedCurlyBracket_PdfDelimiter        '{'
35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedCurlyBracket_PdfDelimiter        '}'
36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNamed_PdfDelimiter                     '/'
37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kComment_PdfDelimiter                   '%'
38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kEscape_PdfSpecial                      '\\'
40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kBackspace_PdfSpecial                   '\x08'
41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// we should evaluate all options. might be even different from one machine to another
44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 1) expand expression, let compiler optimize it
45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 2) binary search
46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 3) linear search in array
47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 4) vector (e.f. T type[256] .. return type[ch] ...
48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 5) manually build the expression with least number of operators, e.g. for consecutive
49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// chars, we can use an binary equal ignoring last bit
50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace))
51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedRoundBracket_PdfDelimiter)||\
57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedInequityBracket_PdfDelimiter)||\
59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedSquareBracket_PdfDelimiter)||\
61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
63571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kNamed_PdfDelimiter)||\
64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                            ((ch)==kComment_PdfDelimiter))
65571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
66571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
694ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
714ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.comconst unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end);
724ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.comconst unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end);
73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): typedef read and integer tyepes? make less readable...
75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef double SkPdfReal;
76571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef int64_t SkPdfInteger;
77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// an allocator only allocates memory, and it deletes it all when the allocator is destroyed
79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it
80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// while the user is looking at the image
81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
823aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeObject;
83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator {
85571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define BUFFER_SIZE 1024
863aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkTDArray<SkPdfNativeObject*> fHistory;
87571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkTDArray<void*> fHandles;
883aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* fCurrent;
89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    int fCurrentUsed;
90571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
913aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* allocBlock();
92a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com    size_t fSizeInBytes;
93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
943aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic:
95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator() {
96a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        fSizeInBytes = sizeof(*this);
97571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fCurrent = allocBlock();
98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fCurrentUsed = 0;
99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    ~SkPdfAllocator();
102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1033aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* allocObject();
104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    // TODO(edisonn): free this memory in destructor, track the usage?
106571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void* alloc(size_t bytes) {
107571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        void* data = malloc(bytes);
108571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        fHandles.push(data);
109a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        fSizeInBytes += bytes;
110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        return data;
111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
112a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com
1137b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com    size_t bytesUsed() const {
114a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        return fSizeInBytes;
115a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com    }
116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1183aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeDoc;
1193aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comconst unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comenum SkPdfTokenType {
122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    kKeyword_TokenType,
123571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    kObject_TokenType,
124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
126571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comstruct PdfToken {
127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    const char*      fKeyword;
128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    size_t           fKeywordLength;
1293aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject*     fObject;
130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfTokenType   fType;
131571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
132571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
134571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer {
136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic:
13733f11b6fcdb7dfce27f953803be40fbacedc7450edisonn@google.com    SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
13833f11b6fcdb7dfce27f953803be40fbacedc7450edisonn@google.com    SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1403aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com    virtual ~SkPdfNativeTokenizer();
141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool readToken(PdfToken* token);
143571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool readTokenCore(PdfToken* token);
144571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void PutBack(PdfToken token);
14578b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com    SkPdfImageDictionary* readInlineImage();
146571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
147571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate:
1483aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeDoc* fDoc;
149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator* fAllocator;
150571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1512ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com    const unsigned char* fUncompressedStreamStart;
1522ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com    const unsigned char* fUncompressedStream;
1532ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com    const unsigned char* fUncompressedStreamEnd;
154571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
155571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool fEmpty;
156571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    bool fHasPutBack;
157571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    PdfToken fPutBack;
1583aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com};
1593aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
160cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#endif  // SkPdfNativeTokenizer_DEFINED
161