SkPdfNativeTokenizer.h revision 3aa355527a3b91d3e12b8bee49e5637d00a736ca
13aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 23aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 33aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com 4571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h" 5571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDict.h" 6571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <math.h> 7571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <string.h> 8571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 9571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfMapper; 10571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfDictionary; 1178b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.comclass SkPdfImageDictionary; 12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// White Spaces 14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNUL_PdfWhiteSpace '\x00' 15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kHT_PdfWhiteSpace '\x09' 16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kLF_PdfWhiteSpace '\x0A' 17571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kFF_PdfWhiteSpace '\x0C' 18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kCR_PdfWhiteSpace '\x0D' 19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kSP_PdfWhiteSpace '\x20' 20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// PdfDelimiters 22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedRoundBracket_PdfDelimiter '(' 23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedRoundBracket_PdfDelimiter ')' 24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedInequityBracket_PdfDelimiter '<' 25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedInequityBracket_PdfDelimiter '>' 26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedSquareBracket_PdfDelimiter '[' 27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedSquareBracket_PdfDelimiter ']' 28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedCurlyBracket_PdfDelimiter '{' 29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedCurlyBracket_PdfDelimiter '}' 30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNamed_PdfDelimiter '/' 31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kComment_PdfDelimiter '%' 32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kEscape_PdfSpecial '\\' 34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kBackspace_PdfSpecial '\x08' 35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? 37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// we should evaluate all options. might be even different from one machine to another 38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 1) expand expression, let compiler optimize it 39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 2) binary search 40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 3) linear search in array 41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 4) vector (e.f. T type[256] .. return type[ch] ... 42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 5) manually build the expression with least number of operators, e.g. for consecutive 43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// chars, we can use an binary equal ignoring last bit 44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace)) 45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) 47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ 50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedRoundBracket_PdfDelimiter)||\ 51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ 52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedInequityBracket_PdfDelimiter)||\ 53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ 54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedSquareBracket_PdfDelimiter)||\ 55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ 56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ 57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kNamed_PdfDelimiter)||\ 58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kComment_PdfDelimiter)) 59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) 61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') 634ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.') 64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 654ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.comconst unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end); 664ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.comconst unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end); 67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): typedef read and integer tyepes? make less readable... 69571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef double SkPdfReal; 70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef int64_t SkPdfInteger; 71571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 72571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// an allocator only allocates memory, and it deletes it all when the allocator is destroyed 73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it 74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// while the user is looking at the image 75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 763aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeObject; 77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator { 79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define BUFFER_SIZE 1024 803aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkTDArray<SkPdfNativeObject*> fHistory; 81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkTDArray<void*> fHandles; 823aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeObject* fCurrent; 83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com int fCurrentUsed; 84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 853aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeObject* allocBlock(); 86a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com size_t fSizeInBytes; 87571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 883aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic: 89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfAllocator() { 90a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com fSizeInBytes = sizeof(*this); 91571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fCurrent = allocBlock(); 92571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fCurrentUsed = 0; 93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 94571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ~SkPdfAllocator(); 96571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 973aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeObject* allocObject(); 98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TODO(edisonn): free this memory in destructor, track the usage? 100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void* alloc(size_t bytes) { 101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void* data = malloc(bytes); 102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fHandles.push(data); 103a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com fSizeInBytes += bytes; 104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com return data; 105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 106a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com 1077b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com size_t bytesUsed() const { 108a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com return fSizeInBytes; 109a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com } 110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1123aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeDoc; 1133aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comconst unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 114571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 115571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comenum SkPdfTokenType { 116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com kKeyword_TokenType, 117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com kObject_TokenType, 118571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 119571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comstruct PdfToken { 121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const char* fKeyword; 122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com size_t fKeywordLength; 1233aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeObject* fObject; 124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfTokenType fType; 125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 126571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} 127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 129571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer { 130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic: 1313aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 1323aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1343aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com virtual ~SkPdfNativeTokenizer(); 135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool readToken(PdfToken* token); 137571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool readTokenCore(PdfToken* token); 138571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void PutBack(PdfToken token); 13978b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com SkPdfImageDictionary* readInlineImage(); 140571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate: 1423aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com SkPdfNativeDoc* fDoc; 143571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const SkPdfMapper* fMapper; 144571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfAllocator* fAllocator; 145571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1462ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com const unsigned char* fUncompressedStreamStart; 1472ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com const unsigned char* fUncompressedStream; 1482ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com const unsigned char* fUncompressedStreamEnd; 149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 150571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool fEmpty; 151571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool fHasPutBack; 152571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com PdfToken fPutBack; 1533aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com}; 1543aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com 1553aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 156