1/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkPdfNativeDoc.h"
9
10#include <stdio.h>
11#include <string.h>
12#include <sys/types.h>
13#include <sys/stat.h>
14
15#include "SkPdfMapper_autogen.h"
16#include "SkPdfNativeObject.h"
17#include "SkPdfNativeTokenizer.h"
18#include "SkPdfReporter.h"
19#include "SkStream.h"
20
21// TODO(edisonn): for some reason on mac these files are found here, but are found from headers
22//#include "SkPdfFileTrailerDictionary_autogen.h"
23//#include "SkPdfCatalogDictionary_autogen.h"
24//#include "SkPdfPageObjectDictionary_autogen.h"
25//#include "SkPdfPageTreeNodeDictionary_autogen.h"
26#include "SkPdfHeaders_autogen.h"
27
28static long getFileSize(const char* filename)
29{
30    struct stat stat_buf;
31    int rc = stat(filename, &stat_buf);
32    return rc == 0 ? (long)stat_buf.st_size : -1;
33}
34
35static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) {
36    while (current > start && !isPdfEOL(*(current - 1))) {
37        current--;
38    }
39    return current;
40}
41
42static const unsigned char* previousLineHome(const unsigned char* start,
43                                             const unsigned char* current) {
44    if (current > start && isPdfEOL(*(current - 1))) {
45        current--;
46    }
47
48    // allows CR+LF, LF+CR but not two CR+CR or LF+LF
49    if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
50        current--;
51    }
52
53    while (current > start && !isPdfEOL(*(current - 1))) {
54        current--;
55    }
56
57    return current;
58}
59
60static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) {
61    while (current < end && !isPdfEOL(*current)) {
62        current++;
63    }
64    current++;
65    if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
66        current++;
67    }
68    return current;
69}
70
71SkPdfNativeDoc* gDoc = NULL;
72
73SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream)
74        : fAllocator(new SkPdfAllocator())
75        , fFileContent(NULL)
76        , fContentLength(0)
77        , fRootCatalogRef(NULL)
78        , fRootCatalog(NULL) {
79    size_t size = stream->getLength();
80    void* ptr = sk_malloc_throw(size);
81    stream->read(ptr, size);
82
83    init(ptr, size);
84}
85
86SkPdfNativeDoc::SkPdfNativeDoc(const char* path)
87        : fAllocator(new SkPdfAllocator())
88        , fFileContent(NULL)
89        , fContentLength(0)
90        , fRootCatalogRef(NULL)
91        , fRootCatalog(NULL) {
92    gDoc = this;
93    FILE* file = fopen(path, "r");
94    // TODO(edisonn): put this in a function that can return NULL
95    if (file) {
96        size_t size = getFileSize(path);
97        void* content = sk_malloc_throw(size);
98        bool ok = (0 != fread(content, size, 1, file));
99        fclose(file);
100        if (!ok) {
101            sk_free(content);
102            SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
103                        "could not read file", NULL, NULL);
104            // TODO(edisonn): not nice to return like this from constructor, create a static
105            // function that can report NULL for failures.
106            return;  // Doc will have 0 pages
107        }
108
109        init(content, size);
110    }
111}
112
113void SkPdfNativeDoc::init(const void* bytes, size_t length) {
114    fFileContent = (const unsigned char*)bytes;
115    fContentLength = length;
116    const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
117    const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
118    const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
119
120    if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
121        SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
122                    "Could not find startxref", NULL, NULL);
123    }
124
125    long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
126
127    bool storeCatalog = true;
128    while (xrefByteOffset >= 0) {
129        const unsigned char* trailerStart = this->readCrossReferenceSection(fFileContent + xrefByteOffset,
130                                                                            xrefstartKeywordLine);
131        xrefByteOffset = -1;
132        if (trailerStart < xrefstartKeywordLine) {
133            this->readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false);
134            storeCatalog = false;
135        }
136    }
137
138    // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
139    // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
140
141    if (fRootCatalogRef) {
142        fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
143        if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
144            SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
145            if (tree && tree->isDictionary() && tree->valid()) {
146                fillPages(tree);
147            }
148        }
149    }
150
151    if (pages() == 0) {
152        // TODO(edisonn): probably it would be better to return NULL and make a clean document.
153        loadWithoutXRef();
154    }
155
156    // TODO(edisonn): corrupted pdf, read it from beginning and rebuild
157    // (xref, trailer, or just read all objects)
158}
159
160void SkPdfNativeDoc::loadWithoutXRef() {
161    const unsigned char* current = fFileContent;
162    const unsigned char* end = fFileContent + fContentLength;
163
164    // TODO(edisonn): read pdf version
165    current = ignoreLine(current, end);
166
167    current = skipPdfWhiteSpaces(current, end);
168    while (current < end) {
169        SkPdfNativeObject token;
170        current = nextObject(current, end, &token, NULL, NULL);
171        if (token.isInteger()) {
172            int id = (int)token.intValue();
173
174            token.reset();
175            current = nextObject(current, end, &token, NULL, NULL);
176            // TODO(edisonn): generation ignored for now (used in pdfs with updates)
177            // int generation = (int)token.intValue();
178
179            token.reset();
180            current = nextObject(current, end, &token, NULL, NULL);
181            // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring.
182            if (!token.isKeyword("obj")) {
183                SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
184                            "Could not find obj", NULL, NULL);
185                continue;
186            }
187
188            while (fObjects.count() < id + 1) {
189                reset(fObjects.append());
190            }
191
192            fObjects[id].fOffset = current - fFileContent;
193
194            SkPdfNativeObject* obj = fAllocator->allocObject();
195            current = nextObject(current, end, obj, fAllocator, this);
196
197            fObjects[id].fResolvedReference = obj;
198            fObjects[id].fObj = obj;
199            fObjects[id].fIsReferenceResolved = true;
200        } else if (token.isKeyword("trailer")) {
201            long dummy;
202            current = readTrailer(current, end, true, &dummy, true);
203        } else if (token.isKeyword("startxref")) {
204            token.reset();
205            current = nextObject(current, end, &token, NULL, NULL);  // ignore startxref
206        }
207
208        current = skipPdfWhiteSpaces(current, end);
209    }
210
211    // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we
212    // might not need it.
213    if (!fRootCatalogRef) {
214        for (unsigned int i = 0 ; i < objects(); i++) {
215            SkPdfNativeObject* obj = object(i);
216            SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL;
217            if (root && root->isReference()) {
218                fRootCatalogRef = root;
219            }
220        }
221    }
222
223    if (fRootCatalogRef) {
224        fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
225        if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
226            SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
227            if (tree && tree->isDictionary() && tree->valid()) {
228                fillPages(tree);
229            }
230        }
231    }
232
233
234}
235
236SkPdfNativeDoc::~SkPdfNativeDoc() {
237    sk_free((void*)fFileContent);
238    delete fAllocator;
239}
240
241const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart,
242                                                               const unsigned char* trailerEnd) {
243    SkPdfNativeObject xref;
244    const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL);
245
246    if (!xref.isKeyword("xref")) {
247        SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref",
248                    NULL, NULL);
249        return trailerEnd;
250    }
251
252    SkPdfNativeObject token;
253    while (current < trailerEnd) {
254        token.reset();
255        const unsigned char* previous = current;
256        current = nextObject(current, trailerEnd, &token, NULL, NULL);
257        if (!token.isInteger()) {
258            SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
259                        "Done readCrossReferenceSection", NULL, NULL);
260            return previous;
261        }
262
263        int startId = (int)token.intValue();
264        token.reset();
265        current = nextObject(current, trailerEnd, &token, NULL, NULL);
266
267        if (!token.isInteger()) {
268            SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection",
269                                      &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
270            return current;
271        }
272
273        int entries = (int)token.intValue();
274
275        for (int i = 0; i < entries; i++) {
276            token.reset();
277            current = nextObject(current, trailerEnd, &token, NULL, NULL);
278            if (!token.isInteger()) {
279                SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
280                                          "readCrossReferenceSection",
281                                          &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
282                return current;
283            }
284            int offset = (int)token.intValue();
285
286            token.reset();
287            current = nextObject(current, trailerEnd, &token, NULL, NULL);
288            if (!token.isInteger()) {
289                SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
290                                          "readCrossReferenceSection",
291                                          &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
292                return current;
293            }
294            int generation = (int)token.intValue();
295
296            token.reset();
297            current = nextObject(current, trailerEnd, &token, NULL, NULL);
298            if (!token.isKeyword() || token.lenstr() != 1 ||
299                (*token.c_str() != 'f' && *token.c_str() != 'n')) {
300                SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
301                                          "readCrossReferenceSection: f or n expected",
302                                          &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
303                return current;
304            }
305
306            this->addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
307        }
308    }
309    SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
310                "Unexpected end of readCrossReferenceSection", NULL, NULL);
311    return current;
312}
313
314const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart,
315                                                 const unsigned char* trailerEnd,
316                                                 bool storeCatalog, long* prev, bool skipKeyword) {
317    *prev = -1;
318
319    const unsigned char* current = trailerStart;
320    if (!skipKeyword) {
321        SkPdfNativeObject trailerKeyword;
322        // Use null allocator, and let it just fail if memory, it should not crash.
323        current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL);
324
325        if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
326            strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
327            SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
328                                      "readTrailer: trailer keyword expected",
329                                      &trailerKeyword,
330                                      SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
331            return current;
332        }
333    }
334
335    SkPdfNativeObject token;
336    current = nextObject(current, trailerEnd, &token, fAllocator, NULL);
337    if (!token.isDictionary()) {
338        return current;
339    }
340    SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
341    if (!trailer->valid()) {
342        return current;
343    }
344
345    if (storeCatalog) {
346        SkPdfNativeObject* ref = trailer->Root(NULL);
347        if (ref == NULL || !ref->isReference()) {
348            SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
349                                      "readTrailer: unexpected root reference",
350                                      ref, SkPdfNativeObject::kReference_PdfObjectType, NULL);
351            return current;
352        }
353        fRootCatalogRef = ref;
354    }
355
356    if (trailer->has_Prev()) {
357        *prev = (long)trailer->Prev(NULL);
358    }
359
360    return current;
361}
362
363void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
364    // TODO(edisonn): security here, verify id
365    while (fObjects.count() < id + 1) {
366        this->reset(fObjects.append());
367    }
368
369    fObjects[id].fOffset = offset;
370    fObjects[id].fObj = NULL;
371    fObjects[id].fResolvedReference = NULL;
372    fObjects[id].fIsReferenceResolved = false;
373}
374
375SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) {
376    long startOffset = fObjects[id].fOffset;
377    //long endOffset = fObjects[id].fOffsetEnd;
378    // TODO(edisonn): use hinted endOffset
379    const unsigned char* current = fFileContent + startOffset;
380    const unsigned char* end = fFileContent + fContentLength;
381
382    SkPdfNativeTokenizer tokenizer(current, (int) (end - current), fAllocator, this);
383
384    SkPdfNativeObject idObj;
385    SkPdfNativeObject generationObj;
386    SkPdfNativeObject objKeyword;
387    SkPdfNativeObject* dict = fAllocator->allocObject();
388
389    current = nextObject(current, end, &idObj, NULL, NULL);
390    if (current >= end) {
391        SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id",
392                    NULL, NULL);
393        return NULL;
394    }
395
396    current = nextObject(current, end, &generationObj, NULL, NULL);
397    if (current >= end) {
398        SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
399                    "reading generation", NULL, NULL);
400        return NULL;
401    }
402
403    current = nextObject(current, end, &objKeyword, NULL, NULL);
404    if (current >= end) {
405        SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
406                    "reading keyword obj", NULL, NULL);
407        return NULL;
408    }
409
410    if (!idObj.isInteger() || id != idObj.intValue()) {
411        SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id",
412                                  &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
413    }
414
415    // TODO(edisonn): verify that the generation is the right one
416    if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) {
417        SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
418                                  "readObject: unexpected generation",
419                                  &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
420    }
421
422    if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
423        SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
424                                  "readObject: unexpected obj keyword",
425                                  &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
426    }
427
428    current = nextObject(current, end, dict, fAllocator, this);
429
430    // TODO(edisonn): report warning/error - verify that the last token is endobj
431
432    return dict;
433}
434
435void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) {
436    SkPdfArray* kids = tree->Kids(this);
437    if (kids == NULL) {
438        *fPages.append() = (SkPdfPageObjectDictionary*)tree;
439        return;
440    }
441
442    int cnt = (int) kids->size();
443    for (int i = 0; i < cnt; i++) {
444        SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i));
445        if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) {
446            *fPages.append() = (SkPdfPageObjectDictionary*)obj;
447        } else {
448            // TODO(edisonn): verify that it is a page tree indeed
449            fillPages((SkPdfPageTreeNodeDictionary*)obj);
450        }
451    }
452}
453
454int SkPdfNativeDoc::pages() const {
455    return fPages.count();
456}
457
458SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) {
459    SkASSERT(page >= 0 && page < fPages.count());
460    return fPages[page];
461}
462
463
464SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) {
465    SkASSERT(page >= 0 && page < fPages.count());
466    return fPages[page]->Resources(this);
467}
468
469// TODO(edisonn): Partial implemented.
470// Move the logics directly in the code generator for inheritable and default values?
471SkRect SkPdfNativeDoc::MediaBox(int page) {
472    SkPdfPageObjectDictionary* current = fPages[page];
473    while (!current->has_MediaBox() && current->has_Parent()) {
474        current = (SkPdfPageObjectDictionary*)current->Parent(this);
475    }
476    if (current) {
477        return current->MediaBox(this);
478    }
479    return SkRect::MakeEmpty();
480}
481
482size_t SkPdfNativeDoc::objects() const {
483    return fObjects.count();
484}
485
486SkPdfNativeObject* SkPdfNativeDoc::object(int i) {
487    SkASSERT(!(i < 0 || i > fObjects.count()));
488
489    if (i < 0 || i > fObjects.count()) {
490        return NULL;
491    }
492
493    if (fObjects[i].fObj == NULL) {
494        fObjects[i].fObj = readObject(i);
495        // TODO(edisonn): For perf, when we read the cross reference sections, we should take
496        // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad
497        // stream, and fail quickly, in case we default to sequential stream read.
498    }
499
500    return fObjects[i].fObj;
501}
502
503const SkPdfMapper* SkPdfNativeDoc::mapper() const {
504    return fMapper;
505}
506
507SkPdfReal* SkPdfNativeDoc::createReal(double value) const {
508    SkPdfNativeObject* obj = fAllocator->allocObject();
509    SkPdfNativeObject::makeReal(value, obj);
510    TRACK_OBJECT_SRC(obj);
511    return (SkPdfReal*)obj;
512}
513
514SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const {
515    SkPdfNativeObject* obj = fAllocator->allocObject();
516    SkPdfNativeObject::makeInteger(value, obj);
517    TRACK_OBJECT_SRC(obj);
518    return (SkPdfInteger*)obj;
519}
520
521SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const {
522    SkPdfNativeObject* obj = fAllocator->allocObject();
523    SkPdfNativeObject::makeString(sz, len, obj);
524    TRACK_OBJECT_SRC(obj);
525    return (SkPdfString*)obj;
526}
527
528SkPdfAllocator* SkPdfNativeDoc::allocator() const {
529    return fAllocator;
530}
531
532SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) {
533    if (ref && ref->isReference()) {
534        int id = ref->referenceId();
535        // TODO(edisonn): generation/updates not supported now
536        //int gen = ref->referenceGeneration();
537
538        // TODO(edisonn): verify id and gen expected
539        if (id < 0 || id >= fObjects.count()) {
540            SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
541                        "resolve reference id out of bounds", NULL, NULL);
542            return NULL;
543        }
544
545        if (fObjects[id].fIsReferenceResolved) {
546            SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity,
547                          kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL);
548            return fObjects[id].fResolvedReference;
549        }
550
551        // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow
552        // here unless we check for resolved reference on next line.
553        // Determine if the pdf is corrupted, or we have a bug here.
554
555        // Avoids recursive calls
556        fObjects[id].fIsReferenceResolved = true;
557
558        if (fObjects[id].fObj == NULL) {
559            fObjects[id].fObj = readObject(id);
560        }
561
562        if (fObjects[id].fObj != NULL && fObjects[id].fResolvedReference == NULL) {
563            if (!fObjects[id].fObj->isReference()) {
564                fObjects[id].fResolvedReference = fObjects[id].fObj;
565            } else {
566                fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
567            }
568        }
569
570        return fObjects[id].fResolvedReference;
571    }
572
573    return (SkPdfNativeObject*)ref;
574}
575
576size_t SkPdfNativeDoc::bytesUsed() const {
577    return fAllocator->bytesUsed() +
578           fContentLength +
579           fObjects.count() * sizeof(PublicObjectEntry) +
580           fPages.count() * sizeof(SkPdfPageObjectDictionary*) +
581           sizeof(*this);
582}
583