SkPdfNativeTokenizer.h revision cf2cfa174ca878c144e17e9fc60ca8e9070d7ded
1cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com/* 2cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * Copyright 2013 Google Inc. 3cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * 4cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * Use of this source code is governed by a BSD-style license that can be 5cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * found in the LICENSE file. 6cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com */ 7cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com 8cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#ifndef SkPdfNativeTokenizer_DEFINED 9cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#define SkPdfNativeTokenizer_DEFINED 103aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com 11571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h" 12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDict.h" 13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <math.h> 14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <string.h> 15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfDictionary; 1778b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.comclass SkPdfImageDictionary; 18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// White Spaces 20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNUL_PdfWhiteSpace '\x00' 21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kHT_PdfWhiteSpace '\x09' 22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kLF_PdfWhiteSpace '\x0A' 23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kFF_PdfWhiteSpace '\x0C' 24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kCR_PdfWhiteSpace '\x0D' 25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kSP_PdfWhiteSpace '\x20' 26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// PdfDelimiters 28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedRoundBracket_PdfDelimiter '(' 29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedRoundBracket_PdfDelimiter ')' 30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedInequityBracket_PdfDelimiter '<' 31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedInequityBracket_PdfDelimiter '>' 32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedSquareBracket_PdfDelimiter '[' 33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedSquareBracket_PdfDelimiter ']' 34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedCurlyBracket_PdfDelimiter '{' 35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedCurlyBracket_PdfDelimiter '}' 36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNamed_PdfDelimiter '/' 37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kComment_PdfDelimiter '%' 38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kEscape_PdfSpecial '\\' 40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kBackspace_PdfSpecial '\x08' 41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? 43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// we should evaluate all options. might be even different from one machine to another 44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 1) expand expression, let compiler optimize it 45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 2) binary search 46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 3) linear search in array 47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 4) vector (e.f. T type[256] .. return type[ch] ... 48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 5) manually build the expression with least number of operators, e.g. for consecutive 49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// chars, we can use an binary equal ignoring last bit 50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace)) 51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) 53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ 56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedRoundBracket_PdfDelimiter)||\ 57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ 58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedInequityBracket_PdfDelimiter)||\ 59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ 60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedSquareBracket_PdfDelimiter)||\ 61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ 62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ 63571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kNamed_PdfDelimiter)||\ 64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kComment_PdfDelimiter)) 65571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 66571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) 67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') 694ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.') 70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 714ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.comconst unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end); 724ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.comconst unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end); 73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): typedef read and integer tyepes? make less readable... 75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef double SkPdfReal; 76571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef int64_t SkPdfInteger; 77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// an allocator only allocates memory, and it deletes it all when the allocator is destroyed 79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it 80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// while the user is looking at the image 81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 823aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeObject; 83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator { 85571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define BUFFER_SIZE 1024 863aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkTDArray<SkPdfNativeObject*> fHistory; 87571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkTDArray<void*> fHandles; 883aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeObject* fCurrent; 89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com int fCurrentUsed; 90571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 913aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeObject* allocBlock(); 92a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com size_t fSizeInBytes; 93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 943aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic: 95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfAllocator() { 96a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com fSizeInBytes = sizeof(*this); 97571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fCurrent = allocBlock(); 98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fCurrentUsed = 0; 99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ~SkPdfAllocator(); 102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1033aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeObject* allocObject(); 104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TODO(edisonn): free this memory in destructor, track the usage? 106571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void* alloc(size_t bytes) { 107571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void* data = malloc(bytes); 108571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fHandles.push(data); 109a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com fSizeInBytes += bytes; 110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com return data; 111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 112a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com 1137b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com size_t bytesUsed() const { 114a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com return fSizeInBytes; 115a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com } 116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1183aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeDoc; 1193aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comconst unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comenum SkPdfTokenType { 122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com kKeyword_TokenType, 123571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com kObject_TokenType, 124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 126571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comstruct PdfToken { 127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const char* fKeyword; 128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com size_t fKeywordLength; 1293aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeObject* fObject; 130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfTokenType fType; 131571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 132571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} 133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 134571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer { 136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic: 13733f11b6fcdb7dfce27f953803be40fbacedc7450edisonn@google.com SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 13833f11b6fcdb7dfce27f953803be40fbacedc7450edisonn@google.com SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1403aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com virtual ~SkPdfNativeTokenizer(); 141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool readToken(PdfToken* token); 143571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool readTokenCore(PdfToken* token); 144571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void PutBack(PdfToken token); 14578b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com SkPdfImageDictionary* readInlineImage(); 146571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 147571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate: 1483aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeDoc* fDoc; 149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfAllocator* fAllocator; 150571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1512ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com const unsigned char* fUncompressedStreamStart; 1522ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com const unsigned char* fUncompressedStream; 1532ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com const unsigned char* fUncompressedStreamEnd; 154571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 155571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool fEmpty; 156571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool fHasPutBack; 157571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com PdfToken fPutBack; 1583aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com}; 1593aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com 160cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#endif // SkPdfNativeTokenizer_DEFINED 161