1cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com/*
2cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * Copyright 2013 Google Inc.
3cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com *
4cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * Use of this source code is governed by a BSD-style license that can be
5cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * found in the LICENSE file.
6cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com */
7cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com
8cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#ifndef SkPdfNativeDoc_DEFINED
9cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#define SkPdfNativeDoc_DEFINED
103aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
11571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkRect.h"
12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "SkTDArray.h"
13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkCanvas;
15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfAllocator;
17571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfMapper;
183aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeObject;
19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfReal;
20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfInteger;
21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfString;
22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfResourceDictionary;
23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfCatalogDictionary;
24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfPageObjectDictionary;
25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfPageTreeNodeDictionary;
26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass SkPdfNativeTokenizer;
28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
29147adb10f7f80ae721879e08474fd575e719487cedisonn@google.comclass SkStream;
30147adb10f7f80ae721879e08474fd575e719487cedisonn@google.com
31c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com// TODO(edisonn): Implement a smart stream that can seek, and that can also fall back to reading
32c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com// the bytes in order. For example, we can try to read the stream optimistically, but if there
33c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com// are issues in the pdf, we must read the pdf from the beginning, and fix whatever errors we can.
34c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com// This would be useful to show quickly page 100 in a pdf (www.example.com/foo.pdf#page100)
35c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com// But if the pdf is missing the xref, then we will have to read most of pdf to be able to render
36c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com// page 100.
37c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com
382af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com/** \class SkPdfNativeDoc
392af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com *
402af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com *  The SkPdfNativeDoc class is used to load a PDF in memory and it represents a PDF Document.
412af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com *
422af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com */
433aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.comclass SkPdfNativeDoc {
44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate:
452af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // Information about public objects in pdf that can be referenced with ID GEN R
46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    struct PublicObjectEntry {
472af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com        // Offset in the file where the object starts.
48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        long fOffset;
492af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
502af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com        // Offset in file where the object ends. Could be used to quickly fail if there is a
512af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com        // problem in pdf structure.
52c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com        // long endOffset;  // TODO(edisonn): determine the end of the object,
53c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com                            // to be used when the doc is corrupted, for fast failure.
542af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
552af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com        // Refered object.
563aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com        SkPdfNativeObject* fObj;
572af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
582af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com        // If refered object is a reference, we resolve recursively the reference until we find
592af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com        // the real object.
603aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com        SkPdfNativeObject* fResolvedReference;
612af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
622af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com        // Used to break a recursive reference to itself.
63f68aed33819cbc98a95edeadde1da9303eca7fb2edisonn@google.com        bool fIsReferenceResolved;
64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    };
653aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
663aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.compublic:
672af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // TODO(edisonn) should be deprecated
683aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeDoc(const char* path);
692af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
702af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // TODO(edisonn) should be deprecated
71909228992c1671ea7451d1c6bc588a8ec991841escroggo@google.com    // FIXME: Untested.
723aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeDoc(SkStream* stream);
73147adb10f7f80ae721879e08474fd575e719487cedisonn@google.com
743aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    ~SkPdfNativeDoc();
75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
762af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // returns the number of pages in the pdf
77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    int pages() const;
782af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
792af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // returns the page resources
80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfResourceDictionary* pageResources(int page);
812af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
822af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // returns the page's mediabox i points - the page physical boundaries.
83951d6532de49003cd5a43f57caf91dd6d3efc33eedisonn@google.com    SkRect MediaBox(int page);
842af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
852af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    //returns objects that are references and can be queried.
86571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    size_t objects() const;
87571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
882af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // returns an object.
892af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // TODO(edisonn): pdf updates are not supported yet.
902af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    //                add generation parameter to support page updates.
912af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    SkPdfNativeObject* object(int id /*, int generation*/ );
922af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
932af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // returns the object that holds all the page informnation
942af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // TODO(edisonn): pdf updates are not supported yet.
952af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    //                add generation parameter to support page updates.
962af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    SkPdfPageObjectDictionary* page(int page/*, int generation*/);
972af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
982af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // TODO(edisonn): deprecate the mapper - was used when we supported multiple
992af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // parsers (podofo)
1002af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // The mapper maps allows an object to be mapped to a different dictionary type
1012af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // and it could verify the integrity of the object.
102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    const SkPdfMapper* mapper() const;
1032af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
1042af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // Allocator of the pdf - this holds all objects that are publicly referenced
1052af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // and all the objects that they refer
106571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator* allocator() const;
107571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1082af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // Allows a renderer to create values to be dumped on the stack for operators to process them.
109571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfReal* createReal(double value) const;
110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfInteger* createInteger(int value) const;
111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    // the string does not own the char*
1122ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com    SkPdfString* createString(const unsigned char* sz, size_t len) const;
113571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1142af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // Resolve a reference object. Will recursively resolve the reference
1152af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // until a real object is found
1163aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* resolveReference(SkPdfNativeObject* ref);
117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
118a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com    // Reports an approximation of all the memory usage.
1197b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com    size_t bytesUsed() const;
120a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com
121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate:
122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
123147adb10f7f80ae721879e08474fd575e719487cedisonn@google.com    // Takes ownership of bytes.
124147adb10f7f80ae721879e08474fd575e719487cedisonn@google.com    void init(const void* bytes, size_t length);
1252af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com
1262af2ad9cc0b2c7d911aed2e8d2ac77c0b7d3b5dfedisonn@google.com    // loads a pdf that has missing xref
1274ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com    void loadWithoutXRef();
128147adb10f7f80ae721879e08474fd575e719487cedisonn@google.com
129c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com    const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart,
130c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com                                                   const unsigned char* trailerEnd);
131c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com    const unsigned char* readTrailer(const unsigned char* trailerStart,
132c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com                                     const unsigned char* trailerEnd,
133c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com                                     bool storeCatalog, long* prev, bool skipKeyword);
134571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
135c8fda9d96be0bd944d37a6e23f7adad5f247c51dedisonn@google.com    // TODO(edisonn): pdfs with updates not supported right now, generation ignored.
136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void addCrossSectionInfo(int id, int generation, int offset, bool isFreed);
137571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    static void reset(PublicObjectEntry* obj) {
138571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        obj->fObj = NULL;
139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        obj->fResolvedReference = NULL;
140571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        obj->fOffset = -1;
141f68aed33819cbc98a95edeadde1da9303eca7fb2edisonn@google.com        obj->fIsReferenceResolved = false;
142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    }
143571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
1443aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* readObject(int id/*, int generation*/);
145571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
146571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    void fillPages(SkPdfPageTreeNodeDictionary* tree);
147571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
148571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfAllocator* fAllocator;
149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfMapper* fMapper;
1502ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com    const unsigned char* fFileContent;
151571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    size_t fContentLength;
1523aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com    SkPdfNativeObject* fRootCatalogRef;
153571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkPdfCatalogDictionary* fRootCatalog;
154571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
155571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    mutable SkTDArray<PublicObjectEntry> fObjects;
156571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    SkTDArray<SkPdfPageObjectDictionary*> fPages;
1573aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com};
1583aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com
159cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com#endif  // SkPdfNativeDoc_DEFINED
160