1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "public/fpdf_text.h"
8
9#include "core/include/fpdfdoc/fpdf_doc.h"
10#include "core/include/fpdftext/fpdf_text.h"
11#include "fpdfsdk/include/fsdk_define.h"
12
13#ifdef PDF_ENABLE_XFA
14#include "fpdfsdk/include/fpdfxfa/fpdfxfa_doc.h"
15#include "fpdfsdk/include/fpdfxfa/fpdfxfa_page.h"
16#endif  // PDF_ENABLE_XFA
17
18#ifdef _WIN32
19#include <tchar.h>
20#endif
21
22DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
23  CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
24  if (!pPDFPage)
25    return nullptr;
26#ifdef PDF_ENABLE_XFA
27  CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
28  CPDFXFA_Document* pDoc = pPage->GetDocument();
29  CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc());
30#else  // PDF_ENABLE_XFA
31  CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
32#endif  // PDF_ENABLE_XFA
33  IPDF_TextPage* textpage =
34      IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L());
35  textpage->ParseTextPage();
36  return textpage;
37}
38DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
39  delete (IPDF_TextPage*)text_page;
40}
41DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
42  if (!text_page)
43    return -1;
44  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
45  return textpage->CountChars();
46}
47
48DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
49                                                   int index) {
50  if (!text_page)
51    return -1;
52  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
53
54  if (index < 0 || index >= textpage->CountChars())
55    return 0;
56
57  FPDF_CHAR_INFO charinfo;
58  textpage->GetCharInfo(index, &charinfo);
59  return charinfo.m_Unicode;
60}
61
62DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
63                                              int index) {
64  if (!text_page)
65    return 0;
66  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
67
68  if (index < 0 || index >= textpage->CountChars())
69    return 0;
70
71  FPDF_CHAR_INFO charinfo;
72  textpage->GetCharInfo(index, &charinfo);
73  return charinfo.m_FontSize;
74}
75
76DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
77                                           int index,
78                                           double* left,
79                                           double* right,
80                                           double* bottom,
81                                           double* top) {
82  if (!text_page)
83    return;
84  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
85
86  if (index < 0 || index >= textpage->CountChars())
87    return;
88  FPDF_CHAR_INFO charinfo;
89  textpage->GetCharInfo(index, &charinfo);
90  *left = charinfo.m_CharBox.left;
91  *right = charinfo.m_CharBox.right;
92  *bottom = charinfo.m_CharBox.bottom;
93  *top = charinfo.m_CharBox.top;
94}
95
96// select
97DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
98                                                 double x,
99                                                 double y,
100                                                 double xTolerance,
101                                                 double yTolerance) {
102  if (!text_page)
103    return -3;
104  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
105  return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
106                                 (FX_FLOAT)yTolerance);
107}
108
109DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
110                                       int start,
111                                       int count,
112                                       unsigned short* result) {
113  if (!text_page)
114    return 0;
115  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
116
117  if (start >= textpage->CountChars())
118    return 0;
119
120  CFX_WideString str = textpage->GetPageText(start, count);
121  if (str.GetLength() > count)
122    str = str.Left(count);
123
124  CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
125  FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
126               cbUTF16str.GetLength());
127  cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
128
129  return cbUTF16str.GetLength() / sizeof(unsigned short);
130}
131
132DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
133                                          int start,
134                                          int count) {
135  if (!text_page)
136    return 0;
137  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
138  return textpage->CountRects(start, count);
139}
140DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
141                                        int rect_index,
142                                        double* left,
143                                        double* top,
144                                        double* right,
145                                        double* bottom) {
146  if (!text_page)
147    return;
148  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
149  CFX_FloatRect rect;
150  textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
151  *left = rect.left;
152  *top = rect.top;
153  *right = rect.right;
154  *bottom = rect.bottom;
155}
156
157DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
158                                              double left,
159                                              double top,
160                                              double right,
161                                              double bottom,
162                                              unsigned short* buffer,
163                                              int buflen) {
164  if (!text_page)
165    return 0;
166  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
167  CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
168                     (FX_FLOAT)top);
169  CFX_WideString str = textpage->GetTextByRect(rect);
170
171  if (buflen <= 0 || !buffer) {
172    return str.GetLength();
173  }
174
175  CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
176  int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
177  int size = buflen > len ? len : buflen;
178  FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
179               size * sizeof(unsigned short));
180  cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
181
182  return size;
183}
184
185// Search
186//-1 for end
187DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
188                                                    FPDF_WIDESTRING findwhat,
189                                                    unsigned long flags,
190                                                    int start_index) {
191  if (!text_page)
192    return NULL;
193  IPDF_TextPageFind* textpageFind = NULL;
194  textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
195  FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
196  textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
197                          start_index);
198  return textpageFind;
199}
200DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
201  if (!handle)
202    return FALSE;
203  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
204  return textpageFind->FindNext();
205}
206DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
207  if (!handle)
208    return FALSE;
209  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
210  return textpageFind->FindPrev();
211}
212DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
213  if (!handle)
214    return 0;
215  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
216  return textpageFind->GetCurOrder();
217}
218DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
219  if (!handle)
220    return 0;
221  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
222  return textpageFind->GetMatchedCount();
223}
224DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
225  if (!handle)
226    return;
227  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
228  delete textpageFind;
229  handle = NULL;
230}
231
232// web link
233DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
234  if (!text_page)
235    return NULL;
236  IPDF_LinkExtract* pageLink = NULL;
237  pageLink = IPDF_LinkExtract::CreateLinkExtract();
238  pageLink->ExtractLinks((IPDF_TextPage*)text_page);
239  return pageLink;
240}
241DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
242  if (!link_page)
243    return 0;
244  IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
245  return pageLink->CountLinks();
246}
247DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
248                                      int link_index,
249                                      unsigned short* buffer,
250                                      int buflen) {
251  if (!link_page)
252    return 0;
253  IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
254  CFX_WideString url = pageLink->GetURL(link_index);
255
256  CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
257  int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
258  if (!buffer || buflen <= 0)
259    return len;
260  int size = len < buflen ? len : buflen;
261  if (size > 0) {
262    FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
263                 size * sizeof(unsigned short));
264    cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
265  }
266  return size;
267}
268DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
269                                          int link_index) {
270  if (!link_page)
271    return 0;
272  IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
273  CFX_RectArray rectArray;
274  pageLink->GetRects(link_index, rectArray);
275  return rectArray.GetSize();
276}
277DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
278                                        int link_index,
279                                        int rect_index,
280                                        double* left,
281                                        double* top,
282                                        double* right,
283                                        double* bottom) {
284  if (!link_page)
285    return;
286  IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
287  CFX_RectArray rectArray;
288  pageLink->GetRects(link_index, rectArray);
289  if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
290    CFX_FloatRect rect = rectArray.GetAt(rect_index);
291    *left = rect.left;
292    *right = rect.right;
293    *top = rect.top;
294    *bottom = rect.bottom;
295  }
296}
297DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
298  delete (IPDF_LinkExtract*)link_page;
299}
300