1// Copyright 2015 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <algorithm> 6#include <memory> 7#include <set> 8#include <string> 9#include <utility> 10#include <vector> 11 12#include "public/fpdfview.h" 13#include "testing/embedder_test.h" 14#include "testing/gtest/include/gtest/gtest.h" 15#include "testing/test_support.h" 16#include "testing/utils/path_service.h" 17 18namespace { 19class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { 20 public: 21 explicit TestAsyncLoader(const std::string& file_name) { 22 std::string file_path; 23 if (!PathService::GetTestFilePath(file_name, &file_path)) 24 return; 25 file_contents_ = GetFileContents(file_path.c_str(), &file_length_); 26 if (!file_contents_) 27 return; 28 29 file_access_.m_FileLen = static_cast<unsigned long>(file_length_); 30 file_access_.m_GetBlock = SGetBlock; 31 file_access_.m_Param = this; 32 33 FX_DOWNLOADHINTS::version = 1; 34 FX_DOWNLOADHINTS::AddSegment = SAddSegment; 35 36 FX_FILEAVAIL::version = 1; 37 FX_FILEAVAIL::IsDataAvail = SIsDataAvail; 38 } 39 40 bool IsOpened() const { return !!file_contents_; } 41 42 FPDF_FILEACCESS* file_access() { return &file_access_; } 43 FX_DOWNLOADHINTS* hints() { return this; } 44 FX_FILEAVAIL* file_avail() { return this; } 45 46 const std::vector<std::pair<size_t, size_t>>& requested_segments() const { 47 return requested_segments_; 48 } 49 50 size_t max_requested_bound() const { return max_requested_bound_; } 51 52 void ClearRequestedSegments() { 53 requested_segments_.clear(); 54 max_requested_bound_ = 0; 55 } 56 57 bool is_new_data_available() const { return is_new_data_available_; } 58 void set_is_new_data_available(bool is_new_data_available) { 59 is_new_data_available_ = is_new_data_available; 60 } 61 62 size_t max_already_available_bound() const { 63 return available_ranges_.empty() ? 0 : available_ranges_.rbegin()->second; 64 } 65 66 private: 67 void SetDataAvailable(size_t start, size_t size) { 68 if (size == 0) 69 return; 70 const auto range = std::make_pair(start, start + size); 71 if (available_ranges_.empty()) { 72 available_ranges_.insert(range); 73 return; 74 } 75 auto start_it = available_ranges_.upper_bound(range); 76 if (start_it != available_ranges_.begin()) 77 --start_it; // start now points to the key equal or lower than offset. 78 if (start_it->second < range.first) 79 ++start_it; // start element is entirely before current range, skip it. 80 81 auto end_it = available_ranges_.upper_bound( 82 std::make_pair(range.second, range.second)); 83 if (start_it == end_it) { // No ranges to merge. 84 available_ranges_.insert(range); 85 return; 86 } 87 88 --end_it; 89 90 size_t new_start = std::min<size_t>(start_it->first, range.first); 91 size_t new_end = std::max(end_it->second, range.second); 92 93 available_ranges_.erase(start_it, ++end_it); 94 available_ranges_.insert(std::make_pair(new_start, new_end)); 95 } 96 97 bool CheckDataAlreadyAvailable(size_t start, size_t size) const { 98 if (size == 0) 99 return false; 100 const auto range = std::make_pair(start, start + size); 101 auto it = available_ranges_.upper_bound(range); 102 if (it == available_ranges_.begin()) 103 return false; // No ranges includes range.start(). 104 105 --it; // Now it starts equal or before range.start(). 106 return it->second >= range.second; 107 } 108 109 int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) { 110 if (!IsDataAvailImpl(pos, size)) 111 return 0; 112 const unsigned long end = 113 std::min(static_cast<unsigned long>(file_length_), pos + size); 114 if (end <= pos) 115 return 0; 116 memcpy(pBuf, file_contents_.get() + pos, end - pos); 117 SetDataAvailable(pos, end - pos); 118 return static_cast<int>(end - pos); 119 } 120 121 void AddSegmentImpl(size_t offset, size_t size) { 122 requested_segments_.push_back(std::make_pair(offset, size)); 123 max_requested_bound_ = std::max(max_requested_bound_, offset + size); 124 } 125 126 bool IsDataAvailImpl(size_t offset, size_t size) { 127 if (offset + size > file_length_) 128 return false; 129 if (is_new_data_available_) { 130 SetDataAvailable(offset, size); 131 return true; 132 } 133 return CheckDataAlreadyAvailable(offset, size); 134 } 135 136 static int SGetBlock(void* param, 137 unsigned long pos, 138 unsigned char* pBuf, 139 unsigned long size) { 140 return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size); 141 } 142 143 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) { 144 return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size); 145 } 146 147 static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis, 148 size_t offset, 149 size_t size) { 150 return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size); 151 } 152 153 FPDF_FILEACCESS file_access_; 154 155 std::unique_ptr<char, pdfium::FreeDeleter> file_contents_; 156 size_t file_length_; 157 std::vector<std::pair<size_t, size_t>> requested_segments_; 158 size_t max_requested_bound_ = 0; 159 bool is_new_data_available_ = true; 160 161 using Range = std::pair<size_t, size_t>; 162 struct range_compare { 163 bool operator()(const Range& lval, const Range& rval) const { 164 return lval.first < rval.first; 165 } 166 }; 167 using RangesContainer = std::set<Range, range_compare>; 168 RangesContainer available_ranges_; 169}; 170 171} // namespace 172 173class FPDFDataAvailEmbeddertest : public EmbedderTest {}; 174 175TEST_F(FPDFDataAvailEmbeddertest, TrailerUnterminated) { 176 // Document must load without crashing but is too malformed to be available. 177 EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf")); 178 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints_)); 179} 180 181TEST_F(FPDFDataAvailEmbeddertest, TrailerAsHexstring) { 182 // Document must load without crashing but is too malformed to be available. 183 EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf")); 184 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints_)); 185} 186 187TEST_F(FPDFDataAvailEmbeddertest, LoadUsingHintTables) { 188 TestAsyncLoader loader("feature_linearized_loading.pdf"); 189 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 190 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); 191 document_ = FPDFAvail_GetDocument(avail_, nullptr); 192 ASSERT_TRUE(document_); 193 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints())); 194 195 // No new data available, to prevent load "Pages" node. 196 loader.set_is_new_data_available(false); 197 FPDF_PAGE page = LoadPage(1); 198 EXPECT_TRUE(page); 199 UnloadPage(page); 200} 201 202TEST_F(FPDFDataAvailEmbeddertest, 203 DoNotLoadMainCrossRefForFirstPageIfLinearized) { 204 TestAsyncLoader loader("feature_linearized_loading.pdf"); 205 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 206 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); 207 document_ = FPDFAvail_GetDocument(avail_, nullptr); 208 ASSERT_TRUE(document_); 209 const int first_page_num = FPDFAvail_GetFirstPageNum(document_); 210 211 // The main cross ref table should not be processed. 212 // (It is always at file end) 213 EXPECT_GT(loader.file_access()->m_FileLen, 214 loader.max_already_available_bound()); 215 216 // Prevent access to non requested data to coerce the parser to send new 217 // request for non available (non requested before) data. 218 loader.set_is_new_data_available(false); 219 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()); 220 221 // The main cross ref table should not be requested. 222 // (It is always at file end) 223 EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound()); 224 225 // Allow parse page. 226 loader.set_is_new_data_available(true); 227 ASSERT_EQ(PDF_DATA_AVAIL, 228 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints())); 229 230 // The main cross ref table should not be processed. 231 // (It is always at file end) 232 EXPECT_GT(loader.file_access()->m_FileLen, 233 loader.max_already_available_bound()); 234 235 // Prevent loading data, while page loading. 236 loader.set_is_new_data_available(false); 237 FPDF_PAGE page = LoadPage(first_page_num); 238 EXPECT_TRUE(page); 239 UnloadPage(page); 240} 241