fpdftext.cpp revision 5ae9d0c6fd838a2967cca72aa5751b51dadc2769
1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "public/fpdf_text.h"
8
9#include <algorithm>
10#include <vector>
11
12#include "core/fpdfapi/page/cpdf_page.h"
13#include "core/fpdfdoc/cpdf_viewerpreferences.h"
14#include "core/fpdftext/cpdf_linkextract.h"
15#include "core/fpdftext/cpdf_textpage.h"
16#include "core/fpdftext/cpdf_textpagefind.h"
17#include "fpdfsdk/fsdk_define.h"
18#include "third_party/base/numerics/safe_conversions.h"
19#include "third_party/base/stl_util.h"
20
21#ifdef PDF_ENABLE_XFA
22#include "fpdfsdk/fpdfxfa/cpdfxfa_context.h"
23#include "fpdfsdk/fpdfxfa/cpdfxfa_page.h"
24#endif  // PDF_ENABLE_XFA
25
26#ifdef _WIN32
27#include <tchar.h>
28#endif
29
30namespace {
31
32CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) {
33  return static_cast<CPDF_TextPage*>(text_page);
34}
35
36CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) {
37  return static_cast<CPDF_TextPageFind*>(handle);
38}
39
40CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) {
41  return static_cast<CPDF_LinkExtract*>(link);
42}
43
44}  // namespace
45
46DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
47  CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
48  if (!pPDFPage)
49    return nullptr;
50
51#ifdef PDF_ENABLE_XFA
52  CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
53  CPDFXFA_Context* pContext = pPage->GetContext();
54  CPDF_ViewerPreferences viewRef(pContext->GetPDFDoc());
55#else  // PDF_ENABLE_XFA
56  CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
57#endif  // PDF_ENABLE_XFA
58
59  CPDF_TextPage* textpage = new CPDF_TextPage(
60      pPDFPage, viewRef.IsDirectionR2L() ? FPDFText_Direction::Right
61                                         : FPDFText_Direction::Left);
62  textpage->ParseTextPage();
63  return textpage;
64}
65
66DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
67  delete CPDFTextPageFromFPDFTextPage(text_page);
68}
69
70DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
71  if (!text_page)
72    return -1;
73
74  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
75  return textpage->CountChars();
76}
77
78DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
79                                                   int index) {
80  if (!text_page)
81    return 0;
82
83  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
84  if (index < 0 || index >= textpage->CountChars())
85    return 0;
86
87  FPDF_CHAR_INFO charinfo;
88  textpage->GetCharInfo(index, &charinfo);
89  return charinfo.m_Unicode;
90}
91
92DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
93                                              int index) {
94  if (!text_page)
95    return 0;
96  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
97
98  if (index < 0 || index >= textpage->CountChars())
99    return 0;
100
101  FPDF_CHAR_INFO charinfo;
102  textpage->GetCharInfo(index, &charinfo);
103  return charinfo.m_FontSize;
104}
105
106DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
107                                           int index,
108                                           double* left,
109                                           double* right,
110                                           double* bottom,
111                                           double* top) {
112  if (!text_page)
113    return;
114  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
115
116  if (index < 0 || index >= textpage->CountChars())
117    return;
118  FPDF_CHAR_INFO charinfo;
119  textpage->GetCharInfo(index, &charinfo);
120  *left = charinfo.m_CharBox.left;
121  *right = charinfo.m_CharBox.right;
122  *bottom = charinfo.m_CharBox.bottom;
123  *top = charinfo.m_CharBox.top;
124}
125
126// select
127DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
128                                                 double x,
129                                                 double y,
130                                                 double xTolerance,
131                                                 double yTolerance) {
132  if (!text_page)
133    return -3;
134
135  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
136  return textpage->GetIndexAtPos(
137      CFX_PointF(static_cast<FX_FLOAT>(x), static_cast<FX_FLOAT>(y)),
138      CFX_SizeF(static_cast<FX_FLOAT>(xTolerance),
139                static_cast<FX_FLOAT>(yTolerance)));
140}
141
142DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
143                                       int start,
144                                       int count,
145                                       unsigned short* result) {
146  if (!text_page)
147    return 0;
148
149  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
150  if (start >= textpage->CountChars())
151    return 0;
152
153  CFX_WideString str = textpage->GetPageText(start, count);
154  if (str.GetLength() > count)
155    str = str.Left(count);
156
157  CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
158  FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
159               cbUTF16str.GetLength());
160  cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
161
162  return cbUTF16str.GetLength() / sizeof(unsigned short);
163}
164
165DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
166                                          int start,
167                                          int count) {
168  if (!text_page)
169    return 0;
170
171  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
172  return textpage->CountRects(start, count);
173}
174
175DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
176                                        int rect_index,
177                                        double* left,
178                                        double* top,
179                                        double* right,
180                                        double* bottom) {
181  if (!text_page)
182    return;
183
184  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
185  CFX_FloatRect rect;
186  textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
187  *left = rect.left;
188  *top = rect.top;
189  *right = rect.right;
190  *bottom = rect.bottom;
191}
192
193DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
194                                              double left,
195                                              double top,
196                                              double right,
197                                              double bottom,
198                                              unsigned short* buffer,
199                                              int buflen) {
200  if (!text_page)
201    return 0;
202
203  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
204  CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
205                     (FX_FLOAT)top);
206  CFX_WideString str = textpage->GetTextByRect(rect);
207
208  if (buflen <= 0 || !buffer)
209    return str.GetLength();
210
211  CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
212  int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
213  int size = buflen > len ? len : buflen;
214  FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
215               size * sizeof(unsigned short));
216  cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
217
218  return size;
219}
220
221// Search
222// -1 for end
223DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
224                                                    FPDF_WIDESTRING findwhat,
225                                                    unsigned long flags,
226                                                    int start_index) {
227  if (!text_page)
228    return nullptr;
229
230  CPDF_TextPageFind* textpageFind =
231      new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page));
232  FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
233  textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
234                          start_index);
235  return textpageFind;
236}
237
238DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
239  if (!handle)
240    return false;
241
242  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
243  return textpageFind->FindNext();
244}
245
246DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
247  if (!handle)
248    return false;
249
250  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
251  return textpageFind->FindPrev();
252}
253
254DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
255  if (!handle)
256    return 0;
257
258  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
259  return textpageFind->GetCurOrder();
260}
261
262DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
263  if (!handle)
264    return 0;
265
266  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
267  return textpageFind->GetMatchedCount();
268}
269
270DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
271  if (!handle)
272    return;
273
274  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
275  delete textpageFind;
276  handle = nullptr;
277}
278
279// web link
280DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
281  if (!text_page)
282    return nullptr;
283
284  CPDF_LinkExtract* pageLink =
285      new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page));
286  pageLink->ExtractLinks();
287  return pageLink;
288}
289
290DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
291  if (!link_page)
292    return 0;
293
294  CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
295  return pdfium::base::checked_cast<int>(pageLink->CountLinks());
296}
297
298DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
299                                      int link_index,
300                                      unsigned short* buffer,
301                                      int buflen) {
302  CFX_WideString wsUrl(L"");
303  if (link_page && link_index >= 0) {
304    CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
305    wsUrl = pageLink->GetURL(link_index);
306  }
307  CFX_ByteString cbUTF16URL = wsUrl.UTF16LE_Encode();
308  int required = cbUTF16URL.GetLength() / sizeof(unsigned short);
309  if (!buffer || buflen <= 0)
310    return required;
311
312  int size = std::min(required, buflen);
313  if (size > 0) {
314    int buf_size = size * sizeof(unsigned short);
315    FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size);
316  }
317  return size;
318}
319
320DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
321                                          int link_index) {
322  if (!link_page || link_index < 0)
323    return 0;
324
325  CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
326  return pdfium::CollectionSize<int>(pageLink->GetRects(link_index));
327}
328
329DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
330                                        int link_index,
331                                        int rect_index,
332                                        double* left,
333                                        double* top,
334                                        double* right,
335                                        double* bottom) {
336  if (!link_page || link_index < 0 || rect_index < 0)
337    return;
338
339  CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
340  std::vector<CFX_FloatRect> rectArray = pageLink->GetRects(link_index);
341  if (rect_index >= pdfium::CollectionSize<int>(rectArray))
342    return;
343
344  *left = rectArray[rect_index].left;
345  *right = rectArray[rect_index].right;
346  *top = rectArray[rect_index].top;
347  *bottom = rectArray[rect_index].bottom;
348}
349
350DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
351  delete CPDFLinkExtractFromFPDFPageLink(link_page);
352}
353