1// Copyright 2016 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
8#define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
9
10#include <map>
11#include <memory>
12#include <set>
13#include <utility>
14#include <vector>
15
16#include "core/fpdfapi/parser/cpdf_parser.h"
17#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
18#include "core/fxcrt/unowned_ptr.h"
19
20class CPDF_CrossRefAvail;
21class CPDF_Dictionary;
22class CPDF_HintTables;
23class CPDF_IndirectObjectHolder;
24class CPDF_LinearizedHeader;
25class CPDF_PageObjectAvail;
26class CPDF_Parser;
27class CPDF_ReadValidator;
28
29enum PDF_DATAAVAIL_STATUS {
30  PDF_DATAAVAIL_HEADER = 0,
31  PDF_DATAAVAIL_FIRSTPAGE,
32  PDF_DATAAVAIL_HINTTABLE,
33  PDF_DATAAVAIL_LOADALLCROSSREF,
34  PDF_DATAAVAIL_ROOT,
35  PDF_DATAAVAIL_INFO,
36  PDF_DATAAVAIL_PAGETREE,
37  PDF_DATAAVAIL_PAGE,
38  PDF_DATAAVAIL_PAGE_LATERLOAD,
39  PDF_DATAAVAIL_RESOURCES,
40  PDF_DATAAVAIL_DONE,
41  PDF_DATAAVAIL_ERROR,
42  PDF_DATAAVAIL_LOADALLFILE,
43};
44
45enum PDF_PAGENODE_TYPE {
46  PDF_PAGENODE_UNKNOWN = 0,
47  PDF_PAGENODE_PAGE,
48  PDF_PAGENODE_PAGES,
49  PDF_PAGENODE_ARRAY,
50};
51
52class CPDF_DataAvail final {
53 public:
54  // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot
55  // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
56  // to make sure the two sets of values match.
57  enum DocAvailStatus {
58    DataError = -1,        // PDF_DATA_ERROR
59    DataNotAvailable = 0,  // PDF_DATA_NOTAVAIL
60    DataAvailable = 1,     // PDF_DATA_AVAIL
61  };
62
63  // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot
64  // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
65  // to make sure the two sets of values match.
66  enum DocLinearizationStatus {
67    LinearizationUnknown = -1,  // PDF_LINEARIZATION_UNKNOWN
68    NotLinearized = 0,          // PDF_NOT_LINEARIZED
69    Linearized = 1,             // PDF_LINEARIZED
70  };
71
72  // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot
73  // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
74  // to make sure the two sets of values match.
75  enum DocFormStatus {
76    FormError = -1,        // PDF_FORM_ERROR
77    FormNotAvailable = 0,  // PDF_FORM_NOTAVAIL
78    FormAvailable = 1,     // PDF_FORM_AVAIL
79    FormNotExist = 2,      // PDF_FORM_NOTEXIST
80  };
81
82  class FileAvail {
83   public:
84    virtual ~FileAvail();
85    virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0;
86  };
87
88  class DownloadHints {
89   public:
90    virtual ~DownloadHints();
91    virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0;
92  };
93
94  CPDF_DataAvail(FileAvail* pFileAvail,
95                 const RetainPtr<IFX_SeekableReadStream>& pFileRead,
96                 bool bSupportHintTable);
97  ~CPDF_DataAvail();
98
99  DocAvailStatus IsDocAvail(DownloadHints* pHints);
100  DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints);
101  DocFormStatus IsFormAvail(DownloadHints* pHints);
102  DocLinearizationStatus IsLinearizedPDF();
103  RetainPtr<IFX_SeekableReadStream> GetFileRead() const;
104  int GetPageCount() const;
105  CPDF_Dictionary* GetPage(int index);
106  RetainPtr<CPDF_ReadValidator> GetValidator() const;
107
108  std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument(
109      const char* password);
110
111  const CPDF_HintTables* GetHintTables() const { return m_pHintTables.get(); }
112
113 protected:
114  class PageNode {
115   public:
116    PageNode();
117    ~PageNode();
118
119    PDF_PAGENODE_TYPE m_type;
120    uint32_t m_dwPageNo;
121    std::vector<std::unique_ptr<PageNode>> m_ChildNodes;
122  };
123
124  static const int kMaxPageRecursionDepth = 1024;
125
126  bool CheckDocStatus();
127  bool CheckHeader();
128  bool CheckFirstPage();
129  bool CheckHintTables();
130  bool CheckRoot();
131  bool CheckInfo();
132  bool CheckPages();
133  bool CheckPage();
134  DocAvailStatus CheckResources(const CPDF_Dictionary* page);
135  DocFormStatus CheckAcroForm();
136  bool CheckPageStatus();
137
138  DocAvailStatus CheckHeaderAndLinearized();
139  std::unique_ptr<CPDF_Object> ParseIndirectObjectAt(
140      FX_FILESIZE pos,
141      uint32_t objnum,
142      CPDF_IndirectObjectHolder* pObjList = nullptr);
143  std::unique_ptr<CPDF_Object> GetObject(uint32_t objnum,
144                                         bool* pExistInFile);
145  bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages);
146  bool PreparePageItem();
147  bool LoadPages();
148  bool CheckAndLoadAllXref();
149  bool LoadAllFile();
150  DocAvailStatus CheckLinearizedData();
151
152  bool CheckPage(uint32_t dwPage);
153  bool LoadDocPages();
154  bool LoadDocPage(uint32_t dwPage);
155  bool CheckPageNode(const PageNode& pageNode,
156                     int32_t iPage,
157                     int32_t& iCount,
158                     int level);
159  bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode);
160  bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode);
161  bool CheckPageCount();
162  bool IsFirstCheck(uint32_t dwPage);
163  void ResetFirstCheck(uint32_t dwPage);
164  bool ValidatePage(uint32_t dwPage);
165  CPDF_SyntaxParser* GetSyntaxParser() const;
166
167  FileAvail* const m_pFileAvail;
168  RetainPtr<CPDF_ReadValidator> m_pFileRead;
169  CPDF_Parser m_parser;
170  std::unique_ptr<CPDF_Object> m_pRoot;
171  uint32_t m_dwRootObjNum = 0;
172  uint32_t m_dwInfoObjNum = 0;
173  std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
174  bool m_bDocAvail = false;
175  std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail;
176  PDF_DATAAVAIL_STATUS m_docStatus = PDF_DATAAVAIL_HEADER;
177  const FX_FILESIZE m_dwFileLen;
178  CPDF_Document* m_pDocument = nullptr;
179  std::vector<uint32_t> m_PageObjList;
180  uint32_t m_PagesObjNum = 0;
181  bool m_bLinearedDataOK = false;
182  bool m_bMainXRefLoadTried = false;
183  bool m_bMainXRefLoadedOK = false;
184  bool m_bPagesTreeLoad = false;
185  bool m_bPagesLoad = false;
186  CPDF_Parser* m_pCurrentParser = nullptr;
187  std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail;
188  std::vector<std::unique_ptr<CPDF_Object>> m_PagesArray;
189  uint32_t m_dwEncryptObjNum = 0;
190  bool m_bTotalLoadPageTree = false;
191  bool m_bCurPageDictLoadOK = false;
192  PageNode m_PageNode;
193  std::set<uint32_t> m_pageMapCheckState;
194  std::set<uint32_t> m_pagesLoadState;
195  std::set<uint32_t> m_SeenPrevPositions;
196  std::unique_ptr<CPDF_HintTables> m_pHintTables;
197  const bool m_bSupportHintTable;
198  std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail;
199  std::map<const CPDF_Object*, std::unique_ptr<CPDF_PageObjectAvail>>
200      m_PagesResourcesAvail;
201  bool m_bHeaderAvail = false;
202};
203
204#endif  // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
205