1/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkPdfNativeTokenizer_DEFINED
9#define SkPdfNativeTokenizer_DEFINED
10
11#include <math.h>
12#include <string.h>
13
14#include "SkPdfConfig.h"
15#include "SkTDArray.h"
16#include "SkTDict.h"
17
18// All these constants are defined by the PDF 1.4 Spec.
19
20class SkPdfDictionary;
21class SkPdfImageDictionary;
22class SkPdfNativeDoc;
23class SkPdfNativeObject;
24
25
26// White Spaces
27#define kNUL_PdfWhiteSpace '\x00'
28#define kHT_PdfWhiteSpace  '\x09'
29#define kLF_PdfWhiteSpace  '\x0A'
30#define kFF_PdfWhiteSpace  '\x0C'
31#define kCR_PdfWhiteSpace  '\x0D'
32#define kSP_PdfWhiteSpace  '\x20'
33
34// PdfDelimiters
35#define kOpenedRoundBracket_PdfDelimiter        '('
36#define kClosedRoundBracket_PdfDelimiter        ')'
37#define kOpenedInequityBracket_PdfDelimiter     '<'
38#define kClosedInequityBracket_PdfDelimiter     '>'
39#define kOpenedSquareBracket_PdfDelimiter       '['
40#define kClosedSquareBracket_PdfDelimiter       ']'
41#define kOpenedCurlyBracket_PdfDelimiter        '{'
42#define kClosedCurlyBracket_PdfDelimiter        '}'
43#define kNamed_PdfDelimiter                     '/'
44#define kComment_PdfDelimiter                   '%'
45
46#define kEscape_PdfSpecial                      '\\'
47#define kBackspace_PdfSpecial                   '\x08'
48
49// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
50// we should evaluate all options. might be even different from one machine to another
51// 1) expand expression, let compiler optimize it
52// 2) binary search
53// 3) linear search in array
54// 4) vector (e.f. T type[256] .. return type[ch] ...
55// 5) manually build the expression with least number of operators, e.g. for consecutive
56// chars, we can use an binary equal ignoring last bit
57#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \
58                             ((ch)==kHT_PdfWhiteSpace)|| \
59                             ((ch)==kLF_PdfWhiteSpace)|| \
60                             ((ch)==kFF_PdfWhiteSpace)|| \
61                             ((ch)==kCR_PdfWhiteSpace)|| \
62                             ((ch)==kSP_PdfWhiteSpace))
63
64#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
65
66
67#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
68                            ((ch)==kClosedRoundBracket_PdfDelimiter)||\
69                            ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
70                            ((ch)==kClosedInequityBracket_PdfDelimiter)||\
71                            ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
72                            ((ch)==kClosedSquareBracket_PdfDelimiter)||\
73                            ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
74                            ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
75                            ((ch)==kNamed_PdfDelimiter)||\
76                            ((ch)==kComment_PdfDelimiter))
77
78#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
79
80#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
81#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
82
83const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end);
84const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end);
85
86#define BUFFER_SIZE 1024
87
88/** \class SkPdfAllocator
89 *
90 *   An allocator only allocates memory, and it deletes it all when the allocator is destroyed.
91 *   This strategy would allow us not to do any garbage collection while we parse and/or render
92 *   a pdf.
93 *
94 */
95class SkPdfAllocator {
96public:
97    SkPdfAllocator() {
98        fSizeInBytes = sizeof(*this);
99        fCurrent = allocBlock();
100        fCurrentUsed = 0;
101    }
102
103    ~SkPdfAllocator();
104
105    // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called.
106    SkPdfNativeObject* allocObject();
107
108    // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called.
109    void* alloc(size_t bytes) {
110        void* data = malloc(bytes);
111        fHandles.push(data);
112        fSizeInBytes += bytes;
113        return data;
114    }
115
116    // Returns the number of bytes used in this allocator.
117    size_t bytesUsed() const {
118        return fSizeInBytes;
119    }
120
121private:
122    SkTDArray<SkPdfNativeObject*> fHistory;
123    SkTDArray<void*> fHandles;
124    SkPdfNativeObject* fCurrent;
125    int fCurrentUsed;
126
127    SkPdfNativeObject* allocBlock();
128    size_t fSizeInBytes;
129};
130
131// Type of a parsed token.
132enum SkPdfTokenType {
133    kKeyword_TokenType,
134    kObject_TokenType,
135};
136
137
138/** \struct PdfToken
139 *
140 *   Stores the result of the parsing - a keyword or an object.
141 *
142 */
143struct PdfToken {
144    const char*             fKeyword;
145    size_t                  fKeywordLength;
146    SkPdfNativeObject*      fObject;
147    SkPdfTokenType          fType;
148
149    PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
150};
151
152/** \class SkPdfNativeTokenizer
153 *
154 *   Responsible to tokenize a stream in small tokens, eityh a keyword or an object.
155 *   A renderer can feed on the tokens and render a pdf.
156 *
157 */
158class SkPdfNativeTokenizer {
159public:
160    SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
161                         SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
162    SkPdfNativeTokenizer(const unsigned char* buffer, int len,
163                         SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
164
165    virtual ~SkPdfNativeTokenizer();
166
167    // Reads one token. Returns false if there are no more tokens.
168    // If writeDiff is true, and a token was read, create a PNG highlighting
169    // the difference caused by this command in /tmp/log_step_by_step.
170    // If PDF_TRACE_DIFF_IN_PNG is not defined, writeDiff does nothing.
171    bool readToken(PdfToken* token, bool writeDiff = false);
172
173    // Put back a token to be read in the nextToken read. Only one token is allowed to be put
174    // back. Must not necesaarely be the last token read.
175    void PutBack(PdfToken token);
176
177    // Reads the inline image that is present in the stream. At this point we just consumed the ID
178    // token already.
179    SkPdfImageDictionary* readInlineImage();
180
181private:
182    bool readTokenCore(PdfToken* token);
183
184    SkPdfNativeDoc* fDoc;
185    SkPdfAllocator* fAllocator;
186
187    const unsigned char* fUncompressedStreamStart;
188    const unsigned char* fUncompressedStream;
189    const unsigned char* fUncompressedStreamEnd;
190
191    bool fEmpty;
192    bool fHasPutBack;
193    PdfToken fPutBack;
194};
195
196const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
197                                SkPdfNativeObject* token,
198                                SkPdfAllocator* allocator,
199                                SkPdfNativeDoc* doc);
200
201#endif  // SkPdfNativeTokenizer_DEFINED
202