1// Copyright 2015 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "core/include/fxcrt/fx_basic.h"
6#include "public/fpdf_text.h"
7#include "public/fpdfview.h"
8#include "testing/embedder_test.h"
9#include "testing/test_support.h"
10#include "testing/gtest/include/gtest/gtest.h"
11
12namespace {
13
14bool check_unsigned_shorts(const char* expected,
15                           const unsigned short* actual,
16                           size_t length) {
17  if (length > strlen(expected) + 1) {
18    return false;
19  }
20  for (size_t i = 0; i < length; ++i) {
21    if (actual[i] != static_cast<unsigned short>(expected[i])) {
22      return false;
23    }
24  }
25  return true;
26}
27
28}  // namespace
29
30class FPDFTextEmbeddertest : public EmbedderTest {};
31
32TEST_F(FPDFTextEmbeddertest, Text) {
33  EXPECT_TRUE(OpenDocument("hello_world.pdf"));
34  FPDF_PAGE page = LoadPage(0);
35  EXPECT_NE(nullptr, page);
36
37  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
38  EXPECT_NE(nullptr, textpage);
39
40  static const char expected[] = "Hello, world!\r\nGoodbye, world!";
41  unsigned short fixed_buffer[128];
42  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
43
44  // Check includes the terminating NUL that is provided.
45  int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer);
46  ASSERT_GE(num_chars, 0);
47  EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars));
48  EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected)));
49
50  // Count does not include the terminating NUL in the string literal.
51  EXPECT_EQ(sizeof(expected) - 1, FPDFText_CountChars(textpage));
52  for (size_t i = 0; i < sizeof(expected) - 1; ++i) {
53    EXPECT_EQ(static_cast<unsigned int>(expected[i]),
54              FPDFText_GetUnicode(textpage, i))
55        << " at " << i;
56  }
57
58  EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
59  EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
60
61  double left = 0.0;
62  double right = 0.0;
63  double bottom = 0.0;
64  double top = 0.0;
65  FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top);
66  EXPECT_NEAR(41.071, left, 0.001);
67  EXPECT_NEAR(46.243, right, 0.001);
68  EXPECT_NEAR(49.844, bottom, 0.001);
69  EXPECT_NEAR(55.520, top, 0.001);
70
71  EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0));
72  EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0));
73  EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0));
74
75  // Test out of range indicies.
76  EXPECT_EQ(-1,
77            FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0));
78  EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0));
79
80  // Count does not include the terminating NUL in the string literal.
81  EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1));
82
83  left = 0.0;
84  right = 0.0;
85  bottom = 0.0;
86  top = 0.0;
87  FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom);
88  EXPECT_NEAR(20.847, left, 0.001);
89  EXPECT_NEAR(135.167, right, 0.001);
90  EXPECT_NEAR(96.655, bottom, 0.001);
91  EXPECT_NEAR(116.000, top, 0.001);
92
93  // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0).
94  left = -1.0;
95  right = -1.0;
96  bottom = -1.0;
97  top = -1.0;
98  FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom);
99  EXPECT_EQ(0.0, left);
100  EXPECT_EQ(0.0, right);
101  EXPECT_EQ(0.0, bottom);
102  EXPECT_EQ(0.0, top);
103
104  left = -2.0;
105  right = -2.0;
106  bottom = -2.0;
107  top = -2.0;
108  FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom);
109  EXPECT_EQ(0.0, left);
110  EXPECT_EQ(0.0, right);
111  EXPECT_EQ(0.0, bottom);
112  EXPECT_EQ(0.0, top);
113
114  EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0));
115
116  // Extract starting at character 4 as above.
117  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
118  EXPECT_EQ(1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
119                                       fixed_buffer, 1));
120  EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1));
121  EXPECT_EQ(0xbdbd, fixed_buffer[1]);
122
123  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
124  EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
125                                       fixed_buffer, 9));
126  EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
127  EXPECT_EQ(0xbdbd, fixed_buffer[9]);
128
129  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
130  EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
131                                        fixed_buffer, 128));
132  EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
133  EXPECT_EQ(0u, fixed_buffer[9]);
134  EXPECT_EQ(0xbdbd, fixed_buffer[10]);
135
136  FPDFText_ClosePage(textpage);
137  UnloadPage(page);
138}
139
140TEST_F(FPDFTextEmbeddertest, TextSearch) {
141  EXPECT_TRUE(OpenDocument("hello_world.pdf"));
142  FPDF_PAGE page = LoadPage(0);
143  EXPECT_NE(nullptr, page);
144
145  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
146  EXPECT_NE(nullptr, textpage);
147
148  std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope =
149      GetFPDFWideString(L"nope");
150  std::unique_ptr<unsigned short, pdfium::FreeDeleter> world =
151      GetFPDFWideString(L"world");
152  std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps =
153      GetFPDFWideString(L"WORLD");
154  std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr =
155      GetFPDFWideString(L"orld");
156
157  // No occurences of "nope" in test page.
158  FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0);
159  EXPECT_NE(nullptr, search);
160  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
161  EXPECT_EQ(0, FPDFText_GetSchCount(search));
162
163  // Advancing finds nothing.
164  EXPECT_FALSE(FPDFText_FindNext(search));
165  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
166  EXPECT_EQ(0, FPDFText_GetSchCount(search));
167
168  // Retreating finds nothing.
169  EXPECT_FALSE(FPDFText_FindPrev(search));
170  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
171  EXPECT_EQ(0, FPDFText_GetSchCount(search));
172  FPDFText_FindClose(search);
173
174  // Two occurences of "world" in test page.
175  search = FPDFText_FindStart(textpage, world.get(), 0, 2);
176  EXPECT_NE(nullptr, search);
177
178  // Remains not found until advanced.
179  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
180  EXPECT_EQ(0, FPDFText_GetSchCount(search));
181
182  // First occurence of "world" in this test page.
183  EXPECT_TRUE(FPDFText_FindNext(search));
184  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
185  EXPECT_EQ(5, FPDFText_GetSchCount(search));
186
187  // Last occurence of "world" in this test page.
188  EXPECT_TRUE(FPDFText_FindNext(search));
189  EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
190  EXPECT_EQ(5, FPDFText_GetSchCount(search));
191
192  // Found position unchanged when fails to advance.
193  EXPECT_FALSE(FPDFText_FindNext(search));
194  EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
195  EXPECT_EQ(5, FPDFText_GetSchCount(search));
196
197  // Back to first occurence.
198  EXPECT_TRUE(FPDFText_FindPrev(search));
199  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
200  EXPECT_EQ(5, FPDFText_GetSchCount(search));
201
202  // Found position unchanged when fails to retreat.
203  EXPECT_FALSE(FPDFText_FindPrev(search));
204  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
205  EXPECT_EQ(5, FPDFText_GetSchCount(search));
206  FPDFText_FindClose(search);
207
208  // Exact search unaffected by case sensitiity and whole word flags.
209  search = FPDFText_FindStart(textpage, world.get(),
210                              FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0);
211  EXPECT_NE(nullptr, search);
212  EXPECT_TRUE(FPDFText_FindNext(search));
213  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
214  EXPECT_EQ(5, FPDFText_GetSchCount(search));
215  FPDFText_FindClose(search);
216
217  // Default is case-insensitive, so matching agaist caps works.
218  search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0);
219  EXPECT_NE(nullptr, search);
220  EXPECT_TRUE(FPDFText_FindNext(search));
221  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
222  EXPECT_EQ(5, FPDFText_GetSchCount(search));
223  FPDFText_FindClose(search);
224
225  // But can be made case sensitive, in which case this fails.
226  search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0);
227  EXPECT_FALSE(FPDFText_FindNext(search));
228  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
229  EXPECT_EQ(0, FPDFText_GetSchCount(search));
230  FPDFText_FindClose(search);
231
232  // Default is match anywhere within word, so matching substirng works.
233  search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0);
234  EXPECT_TRUE(FPDFText_FindNext(search));
235  EXPECT_EQ(8, FPDFText_GetSchResultIndex(search));
236  EXPECT_EQ(4, FPDFText_GetSchCount(search));
237  FPDFText_FindClose(search);
238
239  // But can be made to mach word boundaries, in which case this fails.
240  search =
241      FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0);
242  EXPECT_FALSE(FPDFText_FindNext(search));
243  // TODO(tsepez): investigate strange index/count values in this state.
244  FPDFText_FindClose(search);
245
246  FPDFText_ClosePage(textpage);
247  UnloadPage(page);
248}
249
250// Test that the page has characters despite a bad stream length.
251TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) {
252  EXPECT_TRUE(OpenDocument("bug_57.pdf"));
253  FPDF_PAGE page = LoadPage(0);
254  EXPECT_NE(nullptr, page);
255
256  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
257  EXPECT_NE(nullptr, textpage);
258  EXPECT_EQ(13, FPDFText_CountChars(textpage));
259
260  FPDFText_ClosePage(textpage);
261  UnloadPage(page);
262}
263
264TEST_F(FPDFTextEmbeddertest, WebLinks) {
265  EXPECT_TRUE(OpenDocument("weblinks.pdf"));
266  FPDF_PAGE page = LoadPage(0);
267  EXPECT_NE(nullptr, page);
268
269  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
270  EXPECT_NE(nullptr, textpage);
271
272  FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
273  EXPECT_NE(nullptr, pagelink);
274
275  // Page contains two HTTP-style URLs.
276  EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
277
278  // Only a terminating NUL required for bogus links.
279  EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0));
280  EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0));
281  EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0));
282
283  // Query the number of characters required for each link (incl NUL).
284  EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0));
285  EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
286
287  static const char expected_url[] = "http://example.com?q=foo";
288  unsigned short fixed_buffer[128];
289
290  // Retrieve a link with too small a buffer.  Buffer will not be
291  // NUL-terminated, but must not be modified past indicated length,
292  // so pre-fill with a pattern to check write bounds.
293  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
294  EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1));
295  EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1));
296  EXPECT_EQ(0xbdbd, fixed_buffer[1]);
297
298  // Check buffer that doesn't have space for a terminating NUL.
299  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
300  EXPECT_EQ(
301      sizeof(expected_url) - 1,
302      FPDFLink_GetURL(pagelink, 0, fixed_buffer, sizeof(expected_url) - 1));
303  EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer,
304                                    sizeof(expected_url) - 1));
305  EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url) - 1]);
306
307  // Retreive link with exactly-sized buffer.
308  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
309  EXPECT_EQ(sizeof(expected_url),
310            FPDFLink_GetURL(pagelink, 0, fixed_buffer, sizeof(expected_url)));
311  EXPECT_TRUE(
312      check_unsigned_shorts(expected_url, fixed_buffer, sizeof(expected_url)));
313  EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]);
314  EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]);
315
316  // Retreive link with ample-sized-buffer.
317  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
318  EXPECT_EQ(sizeof(expected_url),
319            FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128));
320  EXPECT_TRUE(
321      check_unsigned_shorts(expected_url, fixed_buffer, sizeof(expected_url)));
322  EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]);
323  EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]);
324
325  // Each link rendered in a single rect in this test page.
326  EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0));
327  EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1));
328
329  // Each link rendered in a single rect in this test page.
330  EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1));
331  EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2));
332  EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000));
333
334  // Check boundary of valid link index with valid rect index.
335  double left = 0.0;
336  double right = 0.0;
337  double top = 0.0;
338  double bottom = 0.0;
339  FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom);
340  EXPECT_NEAR(50.791, left, 0.001);
341  EXPECT_NEAR(187.963, right, 0.001);
342  EXPECT_NEAR(97.624, bottom, 0.001);
343  EXPECT_NEAR(108.736, top, 0.001);
344
345  // Check that valid link with invalid rect index leaves parameters unchanged.
346  left = -1.0;
347  right = -1.0;
348  top = -1.0;
349  bottom = -1.0;
350  FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom);
351  EXPECT_EQ(-1.0, left);
352  EXPECT_EQ(-1.0, right);
353  EXPECT_EQ(-1.0, bottom);
354  EXPECT_EQ(-1.0, top);
355
356  // Check that invalid link index leaves parameters unchanged.
357  left = -2.0;
358  right = -2.0;
359  top = -2.0;
360  bottom = -2.0;
361  FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom);
362  EXPECT_EQ(-2.0, left);
363  EXPECT_EQ(-2.0, right);
364  EXPECT_EQ(-2.0, bottom);
365  EXPECT_EQ(-2.0, top);
366
367  FPDFLink_CloseWebLinks(pagelink);
368  FPDFText_ClosePage(textpage);
369  UnloadPage(page);
370}
371
372TEST_F(FPDFTextEmbeddertest, GetFontSize) {
373  EXPECT_TRUE(OpenDocument("hello_world.pdf"));
374  FPDF_PAGE page = LoadPage(0);
375  EXPECT_NE(nullptr, page);
376
377  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
378  EXPECT_NE(nullptr, textpage);
379
380  const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
381                                        12, 12, 12, 1,  1,  16, 16, 16, 16, 16,
382                                        16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
383
384  int count = FPDFText_CountChars(textpage);
385  ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), count);
386  for (int i = 0; i < count; ++i)
387    EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i;
388
389  FPDFText_ClosePage(textpage);
390  UnloadPage(page);
391}
392