fpdftext.cpp revision 5ae9d0c6fd838a2967cca72aa5751b51dadc2769
1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#include "public/fpdf_text.h" 8 9#include <algorithm> 10#include <vector> 11 12#include "core/fpdfapi/page/cpdf_page.h" 13#include "core/fpdfdoc/cpdf_viewerpreferences.h" 14#include "core/fpdftext/cpdf_linkextract.h" 15#include "core/fpdftext/cpdf_textpage.h" 16#include "core/fpdftext/cpdf_textpagefind.h" 17#include "fpdfsdk/fsdk_define.h" 18#include "third_party/base/numerics/safe_conversions.h" 19#include "third_party/base/stl_util.h" 20 21#ifdef PDF_ENABLE_XFA 22#include "fpdfsdk/fpdfxfa/cpdfxfa_context.h" 23#include "fpdfsdk/fpdfxfa/cpdfxfa_page.h" 24#endif // PDF_ENABLE_XFA 25 26#ifdef _WIN32 27#include <tchar.h> 28#endif 29 30namespace { 31 32CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) { 33 return static_cast<CPDF_TextPage*>(text_page); 34} 35 36CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) { 37 return static_cast<CPDF_TextPageFind*>(handle); 38} 39 40CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) { 41 return static_cast<CPDF_LinkExtract*>(link); 42} 43 44} // namespace 45 46DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) { 47 CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page); 48 if (!pPDFPage) 49 return nullptr; 50 51#ifdef PDF_ENABLE_XFA 52 CPDFXFA_Page* pPage = (CPDFXFA_Page*)page; 53 CPDFXFA_Context* pContext = pPage->GetContext(); 54 CPDF_ViewerPreferences viewRef(pContext->GetPDFDoc()); 55#else // PDF_ENABLE_XFA 56 CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument); 57#endif // PDF_ENABLE_XFA 58 59 CPDF_TextPage* textpage = new CPDF_TextPage( 60 pPDFPage, viewRef.IsDirectionR2L() ? FPDFText_Direction::Right 61 : FPDFText_Direction::Left); 62 textpage->ParseTextPage(); 63 return textpage; 64} 65 66DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) { 67 delete CPDFTextPageFromFPDFTextPage(text_page); 68} 69 70DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) { 71 if (!text_page) 72 return -1; 73 74 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 75 return textpage->CountChars(); 76} 77 78DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, 79 int index) { 80 if (!text_page) 81 return 0; 82 83 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 84 if (index < 0 || index >= textpage->CountChars()) 85 return 0; 86 87 FPDF_CHAR_INFO charinfo; 88 textpage->GetCharInfo(index, &charinfo); 89 return charinfo.m_Unicode; 90} 91 92DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, 93 int index) { 94 if (!text_page) 95 return 0; 96 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 97 98 if (index < 0 || index >= textpage->CountChars()) 99 return 0; 100 101 FPDF_CHAR_INFO charinfo; 102 textpage->GetCharInfo(index, &charinfo); 103 return charinfo.m_FontSize; 104} 105 106DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, 107 int index, 108 double* left, 109 double* right, 110 double* bottom, 111 double* top) { 112 if (!text_page) 113 return; 114 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 115 116 if (index < 0 || index >= textpage->CountChars()) 117 return; 118 FPDF_CHAR_INFO charinfo; 119 textpage->GetCharInfo(index, &charinfo); 120 *left = charinfo.m_CharBox.left; 121 *right = charinfo.m_CharBox.right; 122 *bottom = charinfo.m_CharBox.bottom; 123 *top = charinfo.m_CharBox.top; 124} 125 126// select 127DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, 128 double x, 129 double y, 130 double xTolerance, 131 double yTolerance) { 132 if (!text_page) 133 return -3; 134 135 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 136 return textpage->GetIndexAtPos( 137 CFX_PointF(static_cast<FX_FLOAT>(x), static_cast<FX_FLOAT>(y)), 138 CFX_SizeF(static_cast<FX_FLOAT>(xTolerance), 139 static_cast<FX_FLOAT>(yTolerance))); 140} 141 142DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, 143 int start, 144 int count, 145 unsigned short* result) { 146 if (!text_page) 147 return 0; 148 149 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 150 if (start >= textpage->CountChars()) 151 return 0; 152 153 CFX_WideString str = textpage->GetPageText(start, count); 154 if (str.GetLength() > count) 155 str = str.Left(count); 156 157 CFX_ByteString cbUTF16str = str.UTF16LE_Encode(); 158 FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()), 159 cbUTF16str.GetLength()); 160 cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength()); 161 162 return cbUTF16str.GetLength() / sizeof(unsigned short); 163} 164 165DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, 166 int start, 167 int count) { 168 if (!text_page) 169 return 0; 170 171 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 172 return textpage->CountRects(start, count); 173} 174 175DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, 176 int rect_index, 177 double* left, 178 double* top, 179 double* right, 180 double* bottom) { 181 if (!text_page) 182 return; 183 184 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 185 CFX_FloatRect rect; 186 textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom); 187 *left = rect.left; 188 *top = rect.top; 189 *right = rect.right; 190 *bottom = rect.bottom; 191} 192 193DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page, 194 double left, 195 double top, 196 double right, 197 double bottom, 198 unsigned short* buffer, 199 int buflen) { 200 if (!text_page) 201 return 0; 202 203 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 204 CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right, 205 (FX_FLOAT)top); 206 CFX_WideString str = textpage->GetTextByRect(rect); 207 208 if (buflen <= 0 || !buffer) 209 return str.GetLength(); 210 211 CFX_ByteString cbUTF16Str = str.UTF16LE_Encode(); 212 int len = cbUTF16Str.GetLength() / sizeof(unsigned short); 213 int size = buflen > len ? len : buflen; 214 FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)), 215 size * sizeof(unsigned short)); 216 cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short)); 217 218 return size; 219} 220 221// Search 222// -1 for end 223DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, 224 FPDF_WIDESTRING findwhat, 225 unsigned long flags, 226 int start_index) { 227 if (!text_page) 228 return nullptr; 229 230 CPDF_TextPageFind* textpageFind = 231 new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page)); 232 FX_STRSIZE len = CFX_WideString::WStringLength(findwhat); 233 textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags, 234 start_index); 235 return textpageFind; 236} 237 238DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) { 239 if (!handle) 240 return false; 241 242 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 243 return textpageFind->FindNext(); 244} 245 246DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) { 247 if (!handle) 248 return false; 249 250 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 251 return textpageFind->FindPrev(); 252} 253 254DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) { 255 if (!handle) 256 return 0; 257 258 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 259 return textpageFind->GetCurOrder(); 260} 261 262DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) { 263 if (!handle) 264 return 0; 265 266 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 267 return textpageFind->GetMatchedCount(); 268} 269 270DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) { 271 if (!handle) 272 return; 273 274 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 275 delete textpageFind; 276 handle = nullptr; 277} 278 279// web link 280DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) { 281 if (!text_page) 282 return nullptr; 283 284 CPDF_LinkExtract* pageLink = 285 new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page)); 286 pageLink->ExtractLinks(); 287 return pageLink; 288} 289 290DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) { 291 if (!link_page) 292 return 0; 293 294 CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); 295 return pdfium::base::checked_cast<int>(pageLink->CountLinks()); 296} 297 298DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, 299 int link_index, 300 unsigned short* buffer, 301 int buflen) { 302 CFX_WideString wsUrl(L""); 303 if (link_page && link_index >= 0) { 304 CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); 305 wsUrl = pageLink->GetURL(link_index); 306 } 307 CFX_ByteString cbUTF16URL = wsUrl.UTF16LE_Encode(); 308 int required = cbUTF16URL.GetLength() / sizeof(unsigned short); 309 if (!buffer || buflen <= 0) 310 return required; 311 312 int size = std::min(required, buflen); 313 if (size > 0) { 314 int buf_size = size * sizeof(unsigned short); 315 FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size); 316 } 317 return size; 318} 319 320DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, 321 int link_index) { 322 if (!link_page || link_index < 0) 323 return 0; 324 325 CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); 326 return pdfium::CollectionSize<int>(pageLink->GetRects(link_index)); 327} 328 329DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, 330 int link_index, 331 int rect_index, 332 double* left, 333 double* top, 334 double* right, 335 double* bottom) { 336 if (!link_page || link_index < 0 || rect_index < 0) 337 return; 338 339 CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); 340 std::vector<CFX_FloatRect> rectArray = pageLink->GetRects(link_index); 341 if (rect_index >= pdfium::CollectionSize<int>(rectArray)) 342 return; 343 344 *left = rectArray[rect_index].left; 345 *right = rectArray[rect_index].right; 346 *top = rectArray[rect_index].top; 347 *bottom = rectArray[rect_index].bottom; 348} 349 350DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) { 351 delete CPDFLinkExtractFromFPDFPageLink(link_page); 352} 353