1/* 2 * Copyright 2013 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#ifndef SkPdfNativeTokenizer_DEFINED 9#define SkPdfNativeTokenizer_DEFINED 10 11#include <math.h> 12#include <string.h> 13 14#include "SkPdfConfig.h" 15#include "SkTDArray.h" 16#include "SkTDict.h" 17 18// All these constants are defined by the PDF 1.4 Spec. 19 20class SkPdfDictionary; 21class SkPdfImageDictionary; 22class SkPdfNativeDoc; 23class SkPdfNativeObject; 24 25 26// White Spaces 27#define kNUL_PdfWhiteSpace '\x00' 28#define kHT_PdfWhiteSpace '\x09' 29#define kLF_PdfWhiteSpace '\x0A' 30#define kFF_PdfWhiteSpace '\x0C' 31#define kCR_PdfWhiteSpace '\x0D' 32#define kSP_PdfWhiteSpace '\x20' 33 34// PdfDelimiters 35#define kOpenedRoundBracket_PdfDelimiter '(' 36#define kClosedRoundBracket_PdfDelimiter ')' 37#define kOpenedInequityBracket_PdfDelimiter '<' 38#define kClosedInequityBracket_PdfDelimiter '>' 39#define kOpenedSquareBracket_PdfDelimiter '[' 40#define kClosedSquareBracket_PdfDelimiter ']' 41#define kOpenedCurlyBracket_PdfDelimiter '{' 42#define kClosedCurlyBracket_PdfDelimiter '}' 43#define kNamed_PdfDelimiter '/' 44#define kComment_PdfDelimiter '%' 45 46#define kEscape_PdfSpecial '\\' 47#define kBackspace_PdfSpecial '\x08' 48 49// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? 50// we should evaluate all options. might be even different from one machine to another 51// 1) expand expression, let compiler optimize it 52// 2) binary search 53// 3) linear search in array 54// 4) vector (e.f. T type[256] .. return type[ch] ... 55// 5) manually build the expression with least number of operators, e.g. for consecutive 56// chars, we can use an binary equal ignoring last bit 57#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \ 58 ((ch)==kHT_PdfWhiteSpace)|| \ 59 ((ch)==kLF_PdfWhiteSpace)|| \ 60 ((ch)==kFF_PdfWhiteSpace)|| \ 61 ((ch)==kCR_PdfWhiteSpace)|| \ 62 ((ch)==kSP_PdfWhiteSpace)) 63 64#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) 65 66 67#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ 68 ((ch)==kClosedRoundBracket_PdfDelimiter)||\ 69 ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ 70 ((ch)==kClosedInequityBracket_PdfDelimiter)||\ 71 ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ 72 ((ch)==kClosedSquareBracket_PdfDelimiter)||\ 73 ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ 74 ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ 75 ((ch)==kNamed_PdfDelimiter)||\ 76 ((ch)==kComment_PdfDelimiter)) 77 78#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) 79 80#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') 81#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.') 82 83const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end); 84const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end); 85 86#define BUFFER_SIZE 1024 87 88/** \class SkPdfAllocator 89 * 90 * An allocator only allocates memory, and it deletes it all when the allocator is destroyed. 91 * This strategy would allow us not to do any garbage collection while we parse and/or render 92 * a pdf. 93 * 94 */ 95class SkPdfAllocator { 96public: 97 SkPdfAllocator() { 98 fSizeInBytes = sizeof(*this); 99 fCurrent = allocBlock(); 100 fCurrentUsed = 0; 101 } 102 103 ~SkPdfAllocator(); 104 105 // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called. 106 SkPdfNativeObject* allocObject(); 107 108 // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called. 109 void* alloc(size_t bytes) { 110 void* data = malloc(bytes); 111 fHandles.push(data); 112 fSizeInBytes += bytes; 113 return data; 114 } 115 116 // Returns the number of bytes used in this allocator. 117 size_t bytesUsed() const { 118 return fSizeInBytes; 119 } 120 121private: 122 SkTDArray<SkPdfNativeObject*> fHistory; 123 SkTDArray<void*> fHandles; 124 SkPdfNativeObject* fCurrent; 125 int fCurrentUsed; 126 127 SkPdfNativeObject* allocBlock(); 128 size_t fSizeInBytes; 129}; 130 131// Type of a parsed token. 132enum SkPdfTokenType { 133 kKeyword_TokenType, 134 kObject_TokenType, 135}; 136 137 138/** \struct PdfToken 139 * 140 * Stores the result of the parsing - a keyword or an object. 141 * 142 */ 143struct PdfToken { 144 const char* fKeyword; 145 size_t fKeywordLength; 146 SkPdfNativeObject* fObject; 147 SkPdfTokenType fType; 148 149 PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} 150}; 151 152/** \class SkPdfNativeTokenizer 153 * 154 * Responsible to tokenize a stream in small tokens, eityh a keyword or an object. 155 * A renderer can feed on the tokens and render a pdf. 156 * 157 */ 158class SkPdfNativeTokenizer { 159public: 160 SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, 161 SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 162 SkPdfNativeTokenizer(const unsigned char* buffer, int len, 163 SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 164 165 virtual ~SkPdfNativeTokenizer(); 166 167 // Reads one token. Returns false if there are no more tokens. 168 // If writeDiff is true, and a token was read, create a PNG highlighting 169 // the difference caused by this command in /tmp/log_step_by_step. 170 // If PDF_TRACE_DIFF_IN_PNG is not defined, writeDiff does nothing. 171 bool readToken(PdfToken* token, bool writeDiff = false); 172 173 // Put back a token to be read in the nextToken read. Only one token is allowed to be put 174 // back. Must not necesaarely be the last token read. 175 void PutBack(PdfToken token); 176 177 // Reads the inline image that is present in the stream. At this point we just consumed the ID 178 // token already. 179 SkPdfImageDictionary* readInlineImage(); 180 181private: 182 bool readTokenCore(PdfToken* token); 183 184 SkPdfNativeDoc* fDoc; 185 SkPdfAllocator* fAllocator; 186 187 const unsigned char* fUncompressedStreamStart; 188 const unsigned char* fUncompressedStream; 189 const unsigned char* fUncompressedStreamEnd; 190 191 bool fEmpty; 192 bool fHasPutBack; 193 PdfToken fPutBack; 194}; 195 196const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, 197 SkPdfNativeObject* token, 198 SkPdfAllocator* allocator, 199 SkPdfNativeDoc* doc); 200 201#endif // SkPdfNativeTokenizer_DEFINED 202