SkPdfNativeTokenizer.h revision 7b328fddf94eea5f05ffa36de02d7d9922f504da
13aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 23aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 33aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com 4571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h" 5571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDict.h" 6571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <math.h> 7571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <string.h> 8571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 9571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfMapper; 10571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfDictionary; 11571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// White Spaces 13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNUL_PdfWhiteSpace '\x00' 14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kHT_PdfWhiteSpace '\x09' 15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kLF_PdfWhiteSpace '\x0A' 16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kFF_PdfWhiteSpace '\x0C' 17571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kCR_PdfWhiteSpace '\x0D' 18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kSP_PdfWhiteSpace '\x20' 19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// PdfDelimiters 21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedRoundBracket_PdfDelimiter '(' 22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedRoundBracket_PdfDelimiter ')' 23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedInequityBracket_PdfDelimiter '<' 24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedInequityBracket_PdfDelimiter '>' 25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedSquareBracket_PdfDelimiter '[' 26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedSquareBracket_PdfDelimiter ']' 27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedCurlyBracket_PdfDelimiter '{' 28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedCurlyBracket_PdfDelimiter '}' 29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNamed_PdfDelimiter '/' 30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kComment_PdfDelimiter '%' 31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kEscape_PdfSpecial '\\' 33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kBackspace_PdfSpecial '\x08' 34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? 36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// we should evaluate all options. might be even different from one machine to another 37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 1) expand expression, let compiler optimize it 38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 2) binary search 39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 3) linear search in array 40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 4) vector (e.f. T type[256] .. return type[ch] ... 41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 5) manually build the expression with least number of operators, e.g. for consecutive 42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// chars, we can use an binary equal ignoring last bit 43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace)) 44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) 46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ 49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedRoundBracket_PdfDelimiter)||\ 50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ 51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedInequityBracket_PdfDelimiter)||\ 52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ 53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedSquareBracket_PdfDelimiter)||\ 54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ 55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ 56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kNamed_PdfDelimiter)||\ 57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kComment_PdfDelimiter)) 58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) 60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') 62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-') 63571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* skipPdfWhiteSpaces(unsigned char* buffer, size_t len); 65571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* endOfPdfToken(unsigned char* start, size_t len); 66571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* skipPdfComment(unsigned char* start, size_t len); 67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): typedef read and integer tyepes? make less readable... 69571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef double SkPdfReal; 70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef int64_t SkPdfInteger; 71571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 72571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// an allocator only allocates memory, and it deletes it all when the allocator is destroyed 73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it 74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// while the user is looking at the image 75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 76571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfObject; 77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator { 79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define BUFFER_SIZE 1024 80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkTDArray<SkPdfObject*> fHistory; 81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkTDArray<void*> fHandles; 82571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfObject* fCurrent; 83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com int fCurrentUsed; 84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 85571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfObject* allocBlock(); 86a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com size_t fSizeInBytes; 87571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 883aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic: 89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfAllocator() { 90a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com fSizeInBytes = sizeof(*this); 91571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fCurrent = allocBlock(); 92571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fCurrentUsed = 0; 93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 94571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ~SkPdfAllocator(); 96571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 97571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfObject* allocObject(); 98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TODO(edisonn): free this memory in destructor, track the usage? 100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void* alloc(size_t bytes) { 101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void* data = malloc(bytes); 102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fHandles.push(data); 103a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com fSizeInBytes += bytes; 104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com return data; 105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 106a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com 1077b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com size_t bytesUsed() const { 108a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com return fSizeInBytes; 109a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com } 110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 112951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.comclass SkNativeParsedPDF; 113951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.comunsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc); 114571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 115571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comenum SkPdfTokenType { 116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com kKeyword_TokenType, 117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com kObject_TokenType, 118571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 119571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comstruct PdfToken { 121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const char* fKeyword; 122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com size_t fKeywordLength; 123571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfObject* fObject; 124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfTokenType fType; 125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 126571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} 127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 129571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer { 130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic: 131951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.com SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc); 132951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.com SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc); 133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1343aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com virtual ~SkPdfNativeTokenizer(); 135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool readToken(PdfToken* token); 137571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool readTokenCore(PdfToken* token); 138571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void PutBack(PdfToken token); 139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 140571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate: 141951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.com SkNativeParsedPDF* fDoc; 142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const SkPdfMapper* fMapper; 143571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfAllocator* fAllocator; 144571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 145571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned char* fUncompressedStreamStart; 146571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned char* fUncompressedStream; 147571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned char* fUncompressedStreamEnd; 148571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool fEmpty; 150571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool fHasPutBack; 151571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com PdfToken fPutBack; 1523aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com}; 1533aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com 1543aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 155