1// Copyright 2015 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "core/include/fxcrt/fx_basic.h" 6#include "public/fpdf_text.h" 7#include "public/fpdfview.h" 8#include "testing/embedder_test.h" 9#include "testing/test_support.h" 10#include "testing/gtest/include/gtest/gtest.h" 11 12namespace { 13 14bool check_unsigned_shorts(const char* expected, 15 const unsigned short* actual, 16 size_t length) { 17 if (length > strlen(expected) + 1) { 18 return false; 19 } 20 for (size_t i = 0; i < length; ++i) { 21 if (actual[i] != static_cast<unsigned short>(expected[i])) { 22 return false; 23 } 24 } 25 return true; 26} 27 28} // namespace 29 30class FPDFTextEmbeddertest : public EmbedderTest {}; 31 32TEST_F(FPDFTextEmbeddertest, Text) { 33 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 34 FPDF_PAGE page = LoadPage(0); 35 EXPECT_NE(nullptr, page); 36 37 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 38 EXPECT_NE(nullptr, textpage); 39 40 static const char expected[] = "Hello, world!\r\nGoodbye, world!"; 41 unsigned short fixed_buffer[128]; 42 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 43 44 // Check includes the terminating NUL that is provided. 45 int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); 46 ASSERT_GE(num_chars, 0); 47 EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars)); 48 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); 49 50 // Count does not include the terminating NUL in the string literal. 51 EXPECT_EQ(sizeof(expected) - 1, FPDFText_CountChars(textpage)); 52 for (size_t i = 0; i < sizeof(expected) - 1; ++i) { 53 EXPECT_EQ(static_cast<unsigned int>(expected[i]), 54 FPDFText_GetUnicode(textpage, i)) 55 << " at " << i; 56 } 57 58 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); 59 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); 60 61 double left = 0.0; 62 double right = 0.0; 63 double bottom = 0.0; 64 double top = 0.0; 65 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top); 66 EXPECT_NEAR(41.071, left, 0.001); 67 EXPECT_NEAR(46.243, right, 0.001); 68 EXPECT_NEAR(49.844, bottom, 0.001); 69 EXPECT_NEAR(55.520, top, 0.001); 70 71 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0)); 72 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0)); 73 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0)); 74 75 // Test out of range indicies. 76 EXPECT_EQ(-1, 77 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0)); 78 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0)); 79 80 // Count does not include the terminating NUL in the string literal. 81 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1)); 82 83 left = 0.0; 84 right = 0.0; 85 bottom = 0.0; 86 top = 0.0; 87 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom); 88 EXPECT_NEAR(20.847, left, 0.001); 89 EXPECT_NEAR(135.167, right, 0.001); 90 EXPECT_NEAR(96.655, bottom, 0.001); 91 EXPECT_NEAR(116.000, top, 0.001); 92 93 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0). 94 left = -1.0; 95 right = -1.0; 96 bottom = -1.0; 97 top = -1.0; 98 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom); 99 EXPECT_EQ(0.0, left); 100 EXPECT_EQ(0.0, right); 101 EXPECT_EQ(0.0, bottom); 102 EXPECT_EQ(0.0, top); 103 104 left = -2.0; 105 right = -2.0; 106 bottom = -2.0; 107 top = -2.0; 108 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom); 109 EXPECT_EQ(0.0, left); 110 EXPECT_EQ(0.0, right); 111 EXPECT_EQ(0.0, bottom); 112 EXPECT_EQ(0.0, top); 113 114 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0)); 115 116 // Extract starting at character 4 as above. 117 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 118 EXPECT_EQ(1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 119 fixed_buffer, 1)); 120 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1)); 121 EXPECT_EQ(0xbdbd, fixed_buffer[1]); 122 123 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 124 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 125 fixed_buffer, 9)); 126 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); 127 EXPECT_EQ(0xbdbd, fixed_buffer[9]); 128 129 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 130 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 131 fixed_buffer, 128)); 132 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); 133 EXPECT_EQ(0u, fixed_buffer[9]); 134 EXPECT_EQ(0xbdbd, fixed_buffer[10]); 135 136 FPDFText_ClosePage(textpage); 137 UnloadPage(page); 138} 139 140TEST_F(FPDFTextEmbeddertest, TextSearch) { 141 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 142 FPDF_PAGE page = LoadPage(0); 143 EXPECT_NE(nullptr, page); 144 145 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 146 EXPECT_NE(nullptr, textpage); 147 148 std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope = 149 GetFPDFWideString(L"nope"); 150 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world = 151 GetFPDFWideString(L"world"); 152 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps = 153 GetFPDFWideString(L"WORLD"); 154 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr = 155 GetFPDFWideString(L"orld"); 156 157 // No occurences of "nope" in test page. 158 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0); 159 EXPECT_NE(nullptr, search); 160 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 161 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 162 163 // Advancing finds nothing. 164 EXPECT_FALSE(FPDFText_FindNext(search)); 165 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 166 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 167 168 // Retreating finds nothing. 169 EXPECT_FALSE(FPDFText_FindPrev(search)); 170 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 171 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 172 FPDFText_FindClose(search); 173 174 // Two occurences of "world" in test page. 175 search = FPDFText_FindStart(textpage, world.get(), 0, 2); 176 EXPECT_NE(nullptr, search); 177 178 // Remains not found until advanced. 179 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 180 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 181 182 // First occurence of "world" in this test page. 183 EXPECT_TRUE(FPDFText_FindNext(search)); 184 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 185 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 186 187 // Last occurence of "world" in this test page. 188 EXPECT_TRUE(FPDFText_FindNext(search)); 189 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); 190 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 191 192 // Found position unchanged when fails to advance. 193 EXPECT_FALSE(FPDFText_FindNext(search)); 194 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); 195 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 196 197 // Back to first occurence. 198 EXPECT_TRUE(FPDFText_FindPrev(search)); 199 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 200 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 201 202 // Found position unchanged when fails to retreat. 203 EXPECT_FALSE(FPDFText_FindPrev(search)); 204 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 205 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 206 FPDFText_FindClose(search); 207 208 // Exact search unaffected by case sensitiity and whole word flags. 209 search = FPDFText_FindStart(textpage, world.get(), 210 FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); 211 EXPECT_NE(nullptr, search); 212 EXPECT_TRUE(FPDFText_FindNext(search)); 213 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 214 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 215 FPDFText_FindClose(search); 216 217 // Default is case-insensitive, so matching agaist caps works. 218 search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0); 219 EXPECT_NE(nullptr, search); 220 EXPECT_TRUE(FPDFText_FindNext(search)); 221 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 222 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 223 FPDFText_FindClose(search); 224 225 // But can be made case sensitive, in which case this fails. 226 search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0); 227 EXPECT_FALSE(FPDFText_FindNext(search)); 228 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 229 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 230 FPDFText_FindClose(search); 231 232 // Default is match anywhere within word, so matching substirng works. 233 search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0); 234 EXPECT_TRUE(FPDFText_FindNext(search)); 235 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); 236 EXPECT_EQ(4, FPDFText_GetSchCount(search)); 237 FPDFText_FindClose(search); 238 239 // But can be made to mach word boundaries, in which case this fails. 240 search = 241 FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0); 242 EXPECT_FALSE(FPDFText_FindNext(search)); 243 // TODO(tsepez): investigate strange index/count values in this state. 244 FPDFText_FindClose(search); 245 246 FPDFText_ClosePage(textpage); 247 UnloadPage(page); 248} 249 250// Test that the page has characters despite a bad stream length. 251TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { 252 EXPECT_TRUE(OpenDocument("bug_57.pdf")); 253 FPDF_PAGE page = LoadPage(0); 254 EXPECT_NE(nullptr, page); 255 256 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 257 EXPECT_NE(nullptr, textpage); 258 EXPECT_EQ(13, FPDFText_CountChars(textpage)); 259 260 FPDFText_ClosePage(textpage); 261 UnloadPage(page); 262} 263 264TEST_F(FPDFTextEmbeddertest, WebLinks) { 265 EXPECT_TRUE(OpenDocument("weblinks.pdf")); 266 FPDF_PAGE page = LoadPage(0); 267 EXPECT_NE(nullptr, page); 268 269 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 270 EXPECT_NE(nullptr, textpage); 271 272 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); 273 EXPECT_NE(nullptr, pagelink); 274 275 // Page contains two HTTP-style URLs. 276 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); 277 278 // Only a terminating NUL required for bogus links. 279 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0)); 280 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0)); 281 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0)); 282 283 // Query the number of characters required for each link (incl NUL). 284 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0)); 285 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); 286 287 static const char expected_url[] = "http://example.com?q=foo"; 288 unsigned short fixed_buffer[128]; 289 290 // Retrieve a link with too small a buffer. Buffer will not be 291 // NUL-terminated, but must not be modified past indicated length, 292 // so pre-fill with a pattern to check write bounds. 293 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 294 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1)); 295 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1)); 296 EXPECT_EQ(0xbdbd, fixed_buffer[1]); 297 298 // Check buffer that doesn't have space for a terminating NUL. 299 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 300 EXPECT_EQ( 301 sizeof(expected_url) - 1, 302 FPDFLink_GetURL(pagelink, 0, fixed_buffer, sizeof(expected_url) - 1)); 303 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 304 sizeof(expected_url) - 1)); 305 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url) - 1]); 306 307 // Retreive link with exactly-sized buffer. 308 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 309 EXPECT_EQ(sizeof(expected_url), 310 FPDFLink_GetURL(pagelink, 0, fixed_buffer, sizeof(expected_url))); 311 EXPECT_TRUE( 312 check_unsigned_shorts(expected_url, fixed_buffer, sizeof(expected_url))); 313 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); 314 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); 315 316 // Retreive link with ample-sized-buffer. 317 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 318 EXPECT_EQ(sizeof(expected_url), 319 FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128)); 320 EXPECT_TRUE( 321 check_unsigned_shorts(expected_url, fixed_buffer, sizeof(expected_url))); 322 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); 323 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); 324 325 // Each link rendered in a single rect in this test page. 326 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0)); 327 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1)); 328 329 // Each link rendered in a single rect in this test page. 330 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1)); 331 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2)); 332 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000)); 333 334 // Check boundary of valid link index with valid rect index. 335 double left = 0.0; 336 double right = 0.0; 337 double top = 0.0; 338 double bottom = 0.0; 339 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom); 340 EXPECT_NEAR(50.791, left, 0.001); 341 EXPECT_NEAR(187.963, right, 0.001); 342 EXPECT_NEAR(97.624, bottom, 0.001); 343 EXPECT_NEAR(108.736, top, 0.001); 344 345 // Check that valid link with invalid rect index leaves parameters unchanged. 346 left = -1.0; 347 right = -1.0; 348 top = -1.0; 349 bottom = -1.0; 350 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom); 351 EXPECT_EQ(-1.0, left); 352 EXPECT_EQ(-1.0, right); 353 EXPECT_EQ(-1.0, bottom); 354 EXPECT_EQ(-1.0, top); 355 356 // Check that invalid link index leaves parameters unchanged. 357 left = -2.0; 358 right = -2.0; 359 top = -2.0; 360 bottom = -2.0; 361 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom); 362 EXPECT_EQ(-2.0, left); 363 EXPECT_EQ(-2.0, right); 364 EXPECT_EQ(-2.0, bottom); 365 EXPECT_EQ(-2.0, top); 366 367 FPDFLink_CloseWebLinks(pagelink); 368 FPDFText_ClosePage(textpage); 369 UnloadPage(page); 370} 371 372TEST_F(FPDFTextEmbeddertest, GetFontSize) { 373 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 374 FPDF_PAGE page = LoadPage(0); 375 EXPECT_NE(nullptr, page); 376 377 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 378 EXPECT_NE(nullptr, textpage); 379 380 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 381 12, 12, 12, 1, 1, 16, 16, 16, 16, 16, 382 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}; 383 384 int count = FPDFText_CountChars(textpage); 385 ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), count); 386 for (int i = 0; i < count; ++i) 387 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i; 388 389 FPDFText_ClosePage(textpage); 390 UnloadPage(page); 391} 392