SkPdfNativeTokenizer.h revision 571c70b95f56e22b5a7d6f4f288aa6c9a925a64f
13aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 23aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 33aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com 4571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h" 5571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDict.h" 6571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <math.h> 7571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include <string.h> 8571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 9571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfMapper; 10571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfDictionary; 11571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// White Spaces 13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNUL_PdfWhiteSpace '\x00' 14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kHT_PdfWhiteSpace '\x09' 15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kLF_PdfWhiteSpace '\x0A' 16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kFF_PdfWhiteSpace '\x0C' 17571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kCR_PdfWhiteSpace '\x0D' 18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kSP_PdfWhiteSpace '\x20' 19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// PdfDelimiters 21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedRoundBracket_PdfDelimiter '(' 22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedRoundBracket_PdfDelimiter ')' 23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedInequityBracket_PdfDelimiter '<' 24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedInequityBracket_PdfDelimiter '>' 25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedSquareBracket_PdfDelimiter '[' 26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedSquareBracket_PdfDelimiter ']' 27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kOpenedCurlyBracket_PdfDelimiter '{' 28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kClosedCurlyBracket_PdfDelimiter '}' 29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kNamed_PdfDelimiter '/' 30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kComment_PdfDelimiter '%' 31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kEscape_PdfSpecial '\\' 33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define kBackspace_PdfSpecial '\x08' 34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? 36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// we should evaluate all options. might be even different from one machine to another 37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 1) expand expression, let compiler optimize it 38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 2) binary search 39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 3) linear search in array 40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 4) vector (e.f. T type[256] .. return type[ch] ... 41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// 5) manually build the expression with least number of operators, e.g. for consecutive 42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// chars, we can use an binary equal ignoring last bit 43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace)) 44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) 46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ 49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedRoundBracket_PdfDelimiter)||\ 50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ 51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedInequityBracket_PdfDelimiter)||\ 52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ 53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedSquareBracket_PdfDelimiter)||\ 54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ 55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ 56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kNamed_PdfDelimiter)||\ 57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ((ch)==kComment_PdfDelimiter)) 58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) 60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') 62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-') 63571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* skipPdfWhiteSpaces(unsigned char* buffer, size_t len); 65571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* endOfPdfToken(unsigned char* start, size_t len); 66571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* skipPdfComment(unsigned char* start, size_t len); 67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// TODO(edisonn): typedef read and integer tyepes? make less readable... 69571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef double SkPdfReal; 70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//typedef int64_t SkPdfInteger; 71571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 72571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// an allocator only allocates memory, and it deletes it all when the allocator is destroyed 73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it 74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// while the user is looking at the image 75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 76571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfObject; 77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator { 79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#define BUFFER_SIZE 1024 80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkTDArray<SkPdfObject*> fHistory; 81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkTDArray<void*> fHandles; 82571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfObject* fCurrent; 83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com int fCurrentUsed; 84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 85571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfObject* allocBlock(); 86571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 873aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic: 88571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfAllocator() { 89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fCurrent = allocBlock(); 90571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fCurrentUsed = 0; 91571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 92571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ~SkPdfAllocator(); 94571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfObject* allocObject(); 96571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 97571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TODO(edisonn): free this memory in destructor, track the usage? 98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void* alloc(size_t bytes) { 99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void* data = malloc(bytes); 100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com fHandles.push(data); 101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com return data; 102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 103571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comunsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator); 106571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 107571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comenum SkPdfTokenType { 108571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com kKeyword_TokenType, 109571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com kObject_TokenType, 110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 112571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comstruct PdfToken { 113571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const char* fKeyword; 114571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com size_t fKeywordLength; 115571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfObject* fObject; 116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfTokenType fType; 117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 118571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} 119571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer { 122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic: 123571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator); 124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator); 125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 1263aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com virtual ~SkPdfNativeTokenizer(); 127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool readToken(PdfToken* token); 129571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool readTokenCore(PdfToken* token); 130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com void PutBack(PdfToken token); 131571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 132571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate: 133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const SkPdfMapper* fMapper; 134571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com SkPdfAllocator* fAllocator; 135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned char* fUncompressedStreamStart; 137571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned char* fUncompressedStream; 138571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned char* fUncompressedStreamEnd; 139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 140571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool fEmpty; 141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool fHasPutBack; 142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com PdfToken fPutBack; 1433aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com}; 1443aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com 1453aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ 146