1// Copyright 2015 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <algorithm>
6#include <memory>
7#include <set>
8#include <string>
9#include <utility>
10#include <vector>
11
12#include "public/fpdfview.h"
13#include "testing/embedder_test.h"
14#include "testing/gtest/include/gtest/gtest.h"
15#include "testing/test_support.h"
16#include "testing/utils/path_service.h"
17
18namespace {
19class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
20 public:
21  explicit TestAsyncLoader(const std::string& file_name) {
22    std::string file_path;
23    if (!PathService::GetTestFilePath(file_name, &file_path))
24      return;
25    file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
26    if (!file_contents_)
27      return;
28
29    file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
30    file_access_.m_GetBlock = SGetBlock;
31    file_access_.m_Param = this;
32
33    FX_DOWNLOADHINTS::version = 1;
34    FX_DOWNLOADHINTS::AddSegment = SAddSegment;
35
36    FX_FILEAVAIL::version = 1;
37    FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
38  }
39
40  bool IsOpened() const { return !!file_contents_; }
41
42  FPDF_FILEACCESS* file_access() { return &file_access_; }
43  FX_DOWNLOADHINTS* hints() { return this; }
44  FX_FILEAVAIL* file_avail() { return this; }
45
46  const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
47    return requested_segments_;
48  }
49
50  size_t max_requested_bound() const { return max_requested_bound_; }
51
52  void ClearRequestedSegments() {
53    requested_segments_.clear();
54    max_requested_bound_ = 0;
55  }
56
57  bool is_new_data_available() const { return is_new_data_available_; }
58  void set_is_new_data_available(bool is_new_data_available) {
59    is_new_data_available_ = is_new_data_available;
60  }
61
62  size_t max_already_available_bound() const {
63    return available_ranges_.empty() ? 0 : available_ranges_.rbegin()->second;
64  }
65
66 private:
67  void SetDataAvailable(size_t start, size_t size) {
68    if (size == 0)
69      return;
70    const auto range = std::make_pair(start, start + size);
71    if (available_ranges_.empty()) {
72      available_ranges_.insert(range);
73      return;
74    }
75    auto start_it = available_ranges_.upper_bound(range);
76    if (start_it != available_ranges_.begin())
77      --start_it;  // start now points to the key equal or lower than offset.
78    if (start_it->second < range.first)
79      ++start_it;  // start element is entirely before current range, skip it.
80
81    auto end_it = available_ranges_.upper_bound(
82        std::make_pair(range.second, range.second));
83    if (start_it == end_it) {  // No ranges to merge.
84      available_ranges_.insert(range);
85      return;
86    }
87
88    --end_it;
89
90    size_t new_start = std::min<size_t>(start_it->first, range.first);
91    size_t new_end = std::max(end_it->second, range.second);
92
93    available_ranges_.erase(start_it, ++end_it);
94    available_ranges_.insert(std::make_pair(new_start, new_end));
95  }
96
97  bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
98    if (size == 0)
99      return false;
100    const auto range = std::make_pair(start, start + size);
101    auto it = available_ranges_.upper_bound(range);
102    if (it == available_ranges_.begin())
103      return false;  // No ranges includes range.start().
104
105    --it;  // Now it starts equal or before range.start().
106    return it->second >= range.second;
107  }
108
109  int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
110    if (!IsDataAvailImpl(pos, size))
111      return 0;
112    const unsigned long end =
113        std::min(static_cast<unsigned long>(file_length_), pos + size);
114    if (end <= pos)
115      return 0;
116    memcpy(pBuf, file_contents_.get() + pos, end - pos);
117    SetDataAvailable(pos, end - pos);
118    return static_cast<int>(end - pos);
119  }
120
121  void AddSegmentImpl(size_t offset, size_t size) {
122    requested_segments_.push_back(std::make_pair(offset, size));
123    max_requested_bound_ = std::max(max_requested_bound_, offset + size);
124  }
125
126  bool IsDataAvailImpl(size_t offset, size_t size) {
127    if (offset + size > file_length_)
128      return false;
129    if (is_new_data_available_) {
130      SetDataAvailable(offset, size);
131      return true;
132    }
133    return CheckDataAlreadyAvailable(offset, size);
134  }
135
136  static int SGetBlock(void* param,
137                       unsigned long pos,
138                       unsigned char* pBuf,
139                       unsigned long size) {
140    return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
141  }
142
143  static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
144    return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
145  }
146
147  static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
148                                size_t offset,
149                                size_t size) {
150    return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
151  }
152
153  FPDF_FILEACCESS file_access_;
154
155  std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
156  size_t file_length_;
157  std::vector<std::pair<size_t, size_t>> requested_segments_;
158  size_t max_requested_bound_ = 0;
159  bool is_new_data_available_ = true;
160
161  using Range = std::pair<size_t, size_t>;
162  struct range_compare {
163    bool operator()(const Range& lval, const Range& rval) const {
164      return lval.first < rval.first;
165    }
166  };
167  using RangesContainer = std::set<Range, range_compare>;
168  RangesContainer available_ranges_;
169};
170
171}  // namespace
172
173class FPDFDataAvailEmbeddertest : public EmbedderTest {};
174
175TEST_F(FPDFDataAvailEmbeddertest, TrailerUnterminated) {
176  // Document must load without crashing but is too malformed to be available.
177  EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
178  EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints_));
179}
180
181TEST_F(FPDFDataAvailEmbeddertest, TrailerAsHexstring) {
182  // Document must load without crashing but is too malformed to be available.
183  EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
184  EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints_));
185}
186
187TEST_F(FPDFDataAvailEmbeddertest, LoadUsingHintTables) {
188  TestAsyncLoader loader("feature_linearized_loading.pdf");
189  avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
190  ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
191  document_ = FPDFAvail_GetDocument(avail_, nullptr);
192  ASSERT_TRUE(document_);
193  ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
194
195  // No new data available, to prevent load "Pages" node.
196  loader.set_is_new_data_available(false);
197  FPDF_PAGE page = LoadPage(1);
198  EXPECT_TRUE(page);
199  UnloadPage(page);
200}
201
202TEST_F(FPDFDataAvailEmbeddertest,
203       DoNotLoadMainCrossRefForFirstPageIfLinearized) {
204  TestAsyncLoader loader("feature_linearized_loading.pdf");
205  avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
206  ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
207  document_ = FPDFAvail_GetDocument(avail_, nullptr);
208  ASSERT_TRUE(document_);
209  const int first_page_num = FPDFAvail_GetFirstPageNum(document_);
210
211  // The main cross ref table should not be processed.
212  // (It is always at file end)
213  EXPECT_GT(loader.file_access()->m_FileLen,
214            loader.max_already_available_bound());
215
216  // Prevent access to non requested data to coerce the parser to send new
217  // request for non available (non requested before) data.
218  loader.set_is_new_data_available(false);
219  FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints());
220
221  // The main cross ref table should not be requested.
222  // (It is always at file end)
223  EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
224
225  // Allow parse page.
226  loader.set_is_new_data_available(true);
227  ASSERT_EQ(PDF_DATA_AVAIL,
228            FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()));
229
230  // The main cross ref table should not be processed.
231  // (It is always at file end)
232  EXPECT_GT(loader.file_access()->m_FileLen,
233            loader.max_already_available_bound());
234
235  // Prevent loading data, while page loading.
236  loader.set_is_new_data_available(false);
237  FPDF_PAGE page = LoadPage(first_page_num);
238  EXPECT_TRUE(page);
239  UnloadPage(page);
240}
241