1/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkPdfNativeDoc_DEFINED
9#define SkPdfNativeDoc_DEFINED
10
11#include "SkRect.h"
12#include "SkTDArray.h"
13
14class SkCanvas;
15
16class SkPdfAllocator;
17class SkPdfMapper;
18class SkPdfNativeObject;
19class SkPdfReal;
20class SkPdfInteger;
21class SkPdfString;
22class SkPdfResourceDictionary;
23class SkPdfCatalogDictionary;
24class SkPdfPageObjectDictionary;
25class SkPdfPageTreeNodeDictionary;
26
27class SkPdfNativeTokenizer;
28
29class SkStream;
30
31// TODO(edisonn): Implement a smart stream that can seek, and that can also fall back to reading
32// the bytes in order. For example, we can try to read the stream optimistically, but if there
33// are issues in the pdf, we must read the pdf from the beginning, and fix whatever errors we can.
34// This would be useful to show quickly page 100 in a pdf (www.example.com/foo.pdf#page100)
35// But if the pdf is missing the xref, then we will have to read most of pdf to be able to render
36// page 100.
37
38/** \class SkPdfNativeDoc
39 *
40 *  The SkPdfNativeDoc class is used to load a PDF in memory and it represents a PDF Document.
41 *
42 */
43class SkPdfNativeDoc {
44private:
45    // Information about public objects in pdf that can be referenced with ID GEN R
46    struct PublicObjectEntry {
47        // Offset in the file where the object starts.
48        long fOffset;
49
50        // Offset in file where the object ends. Could be used to quickly fail if there is a
51        // problem in pdf structure.
52        // long endOffset;  // TODO(edisonn): determine the end of the object,
53                            // to be used when the doc is corrupted, for fast failure.
54
55        // Refered object.
56        SkPdfNativeObject* fObj;
57
58        // If refered object is a reference, we resolve recursively the reference until we find
59        // the real object.
60        SkPdfNativeObject* fResolvedReference;
61
62        // Used to break a recursive reference to itself.
63        bool fIsReferenceResolved;
64    };
65
66public:
67    // TODO(edisonn) should be deprecated
68    SkPdfNativeDoc(const char* path);
69
70    // TODO(edisonn) should be deprecated
71    // FIXME: Untested.
72    SkPdfNativeDoc(SkStream* stream);
73
74    ~SkPdfNativeDoc();
75
76    // returns the number of pages in the pdf
77    int pages() const;
78
79    // returns the page resources
80    SkPdfResourceDictionary* pageResources(int page);
81
82    // returns the page's mediabox i points - the page physical boundaries.
83    SkRect MediaBox(int page);
84
85    //returns objects that are references and can be queried.
86    size_t objects() const;
87
88    // returns an object.
89    // TODO(edisonn): pdf updates are not supported yet.
90    //                add generation parameter to support page updates.
91    SkPdfNativeObject* object(int id /*, int generation*/ );
92
93    // returns the object that holds all the page informnation
94    // TODO(edisonn): pdf updates are not supported yet.
95    //                add generation parameter to support page updates.
96    SkPdfPageObjectDictionary* page(int page/*, int generation*/);
97
98    // TODO(edisonn): deprecate the mapper - was used when we supported multiple
99    // parsers (podofo)
100    // The mapper maps allows an object to be mapped to a different dictionary type
101    // and it could verify the integrity of the object.
102    const SkPdfMapper* mapper() const;
103
104    // Allocator of the pdf - this holds all objects that are publicly referenced
105    // and all the objects that they refer
106    SkPdfAllocator* allocator() const;
107
108    // Allows a renderer to create values to be dumped on the stack for operators to process them.
109    SkPdfReal* createReal(double value) const;
110    SkPdfInteger* createInteger(int value) const;
111    // the string does not own the char*
112    SkPdfString* createString(const unsigned char* sz, size_t len) const;
113
114    // Resolve a reference object. Will recursively resolve the reference
115    // until a real object is found
116    SkPdfNativeObject* resolveReference(SkPdfNativeObject* ref);
117
118    // Reports an approximation of all the memory usage.
119    size_t bytesUsed() const;
120
121private:
122
123    // Takes ownership of bytes.
124    void init(const void* bytes, size_t length);
125
126    // loads a pdf that has missing xref
127    void loadWithoutXRef();
128
129    const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart,
130                                                   const unsigned char* trailerEnd);
131    const unsigned char* readTrailer(const unsigned char* trailerStart,
132                                     const unsigned char* trailerEnd,
133                                     bool storeCatalog, long* prev, bool skipKeyword);
134
135    // TODO(edisonn): pdfs with updates not supported right now, generation ignored.
136    void addCrossSectionInfo(int id, int generation, int offset, bool isFreed);
137    static void reset(PublicObjectEntry* obj) {
138        obj->fObj = NULL;
139        obj->fResolvedReference = NULL;
140        obj->fOffset = -1;
141        obj->fIsReferenceResolved = false;
142    }
143
144    SkPdfNativeObject* readObject(int id/*, int generation*/);
145
146    void fillPages(SkPdfPageTreeNodeDictionary* tree);
147
148    SkPdfAllocator* fAllocator;
149    SkPdfMapper* fMapper;
150    const unsigned char* fFileContent;
151    size_t fContentLength;
152    SkPdfNativeObject* fRootCatalogRef;
153    SkPdfCatalogDictionary* fRootCatalog;
154
155    mutable SkTDArray<PublicObjectEntry> fObjects;
156    SkTDArray<SkPdfPageObjectDictionary*> fPages;
157};
158
159#endif  // SkPdfNativeDoc_DEFINED
160