fpdf_text.h revision ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4
1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#ifndef PUBLIC_FPDF_TEXT_H_ 8#define PUBLIC_FPDF_TEXT_H_ 9 10#include "fpdfview.h" 11 12// Exported Functions 13#ifdef __cplusplus 14extern "C" { 15#endif 16 17// Function: FPDFText_LoadPage 18// Prepare information about all characters in a page. 19// Parameters: 20// page - Handle to the page. Returned by FPDF_LoadPage function 21// (in FPDFVIEW module). 22// Return value: 23// A handle to the text page information structure. 24// NULL if something goes wrong. 25// Comments: 26// Application must call FPDFText_ClosePage to release the text page 27// information. 28// 29DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); 30 31// Function: FPDFText_ClosePage 32// Release all resources allocated for a text page information 33// structure. 34// Parameters: 35// text_page - Handle to a text page information structure. 36// Returned by FPDFText_LoadPage function. 37// Return Value: 38// None. 39// 40DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); 41 42// Function: FPDFText_CountChars 43// Get number of characters in a page. 44// Parameters: 45// text_page - Handle to a text page information structure. 46// Returned by FPDFText_LoadPage function. 47// Return value: 48// Number of characters in the page. Return -1 for error. 49// Generated characters, like additional space characters, new line 50// characters, are also counted. 51// Comments: 52// Characters in a page form a "stream", inside the stream, each 53// character has an index. 54// We will use the index parameters in many of FPDFTEXT functions. The 55// first character in the page 56// has an index value of zero. 57// 58DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); 59 60// Function: FPDFText_GetUnicode 61// Get Unicode of a character in a page. 62// Parameters: 63// text_page - Handle to a text page information structure. 64// Returned by FPDFText_LoadPage function. 65// index - Zero-based index of the character. 66// Return value: 67// The Unicode of the particular character. 68// If a character is not encoded in Unicode and Foxit engine can't 69// convert to Unicode, 70// the return value will be zero. 71// 72DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, 73 int index); 74 75// Function: FPDFText_GetFontSize 76// Get the font size of a particular character. 77// Parameters: 78// text_page - Handle to a text page information structure. 79// Returned by FPDFText_LoadPage function. 80// index - Zero-based index of the character. 81// Return value: 82// The font size of the particular character, measured in points (about 83// 1/72 inch). 84// This is the typographic size of the font (so called "em size"). 85// 86DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, 87 int index); 88 89// Function: FPDFText_GetCharBox 90// Get bounding box of a particular character. 91// Parameters: 92// text_page - Handle to a text page information structure. 93// Returned by FPDFText_LoadPage function. 94// index - Zero-based index of the character. 95// left - Pointer to a double number receiving left position 96// of the character box. 97// right - Pointer to a double number receiving right position 98// of the character box. 99// bottom - Pointer to a double number receiving bottom position 100// of the character box. 101// top - Pointer to a double number receiving top position of 102// the character box. 103// Return Value: 104// None. 105// Comments: 106// All positions are measured in PDF "user space". 107// 108DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, 109 int index, 110 double* left, 111 double* right, 112 double* bottom, 113 double* top); 114 115// Function: FPDFText_GetCharIndexAtPos 116// Get the index of a character at or nearby a certain position on the 117// page. 118// Parameters: 119// text_page - Handle to a text page information structure. 120// Returned by FPDFText_LoadPage function. 121// x - X position in PDF "user space". 122// y - Y position in PDF "user space". 123// xTolerance - An x-axis tolerance value for character hit 124// detection, in point unit. 125// yTolerance - A y-axis tolerance value for character hit 126// detection, in point unit. 127// Return Value: 128// The zero-based index of the character at, or nearby the point (x,y). 129// If there is no character at or nearby the point, return value will 130// be -1. 131// If an error occurs, -3 will be returned. 132// 133DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, 134 double x, 135 double y, 136 double xTolerance, 137 double yTolerance); 138 139// Function: FPDFText_GetText 140// Extract unicode text string from the page. 141// Parameters: 142// text_page - Handle to a text page information structure. 143// Returned by FPDFText_LoadPage function. 144// start_index - Index for the start characters. 145// count - Number of characters to be extracted. 146// result - A buffer (allocated by application) receiving the 147// extracted unicodes. 148// The size of the buffer must be able to hold the 149// number of characters plus a terminator. 150// Return Value: 151// Number of characters written into the result buffer, including the 152// trailing terminator. 153// Comments: 154// This function ignores characters without unicode information. 155// 156DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, 157 int start_index, 158 int count, 159 unsigned short* result); 160 161// Function: FPDFText_CountRects 162// Count number of rectangular areas occupied by a segment of texts. 163// Parameters: 164// text_page - Handle to a text page information structure. 165// Returned by FPDFText_LoadPage function. 166// start_index - Index for the start characters. 167// count - Number of characters. 168// Return value: 169// Number of rectangles. Zero for error. 170// Comments: 171// This function, along with FPDFText_GetRect can be used by 172// applications to detect the position 173// on the page for a text segment, so proper areas can be highlighted 174// or something. 175// FPDFTEXT will automatically merge small character boxes into bigger 176// one if those characters 177// are on the same line and use same font settings. 178// 179DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, 180 int start_index, 181 int count); 182 183// Function: FPDFText_GetRect 184// Get a rectangular area from the result generated by 185// FPDFText_CountRects. 186// Parameters: 187// text_page - Handle to a text page information structure. 188// Returned by FPDFText_LoadPage function. 189// rect_index - Zero-based index for the rectangle. 190// left - Pointer to a double value receiving the rectangle 191// left boundary. 192// top - Pointer to a double value receiving the rectangle 193// top boundary. 194// right - Pointer to a double value receiving the rectangle 195// right boundary. 196// bottom - Pointer to a double value receiving the rectangle 197// bottom boundary. 198// Return Value: 199// None. 200// 201DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, 202 int rect_index, 203 double* left, 204 double* top, 205 double* right, 206 double* bottom); 207 208// Function: FPDFText_GetBoundedText 209// Extract unicode text within a rectangular boundary on the page. 210// Parameters: 211// text_page - Handle to a text page information structure. 212// Returned by FPDFText_LoadPage function. 213// left - Left boundary. 214// top - Top boundary. 215// right - Right boundary. 216// bottom - Bottom boundary. 217// buffer - A unicode buffer. 218// buflen - Number of characters (not bytes) for the buffer, 219// excluding an additional terminator. 220// Return Value: 221// If buffer is NULL or buflen is zero, return number of characters 222// (not bytes) of text present within 223// the rectangle, excluding a terminating NUL. Generally you should 224// pass a buffer at least one larger 225// than this if you want a terminating NUL, which will be provided if 226// space is available. 227// Otherwise, return number of characters copied into the buffer, 228// including the terminating NUL 229// when space for it is available. 230// Comment: 231// If the buffer is too small, as much text as will fit is copied into 232// it. 233// 234DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page, 235 double left, 236 double top, 237 double right, 238 double bottom, 239 unsigned short* buffer, 240 int buflen); 241 242// Flags used by FPDFText_FindStart function. 243#define FPDF_MATCHCASE \ 244 0x00000001 // If not set, it will not match case by default. 245#define FPDF_MATCHWHOLEWORD \ 246 0x00000002 // If not set, it will not match the whole word by default. 247 248// Function: FPDFText_FindStart 249// Start a search. 250// Parameters: 251// text_page - Handle to a text page information structure. 252// Returned by FPDFText_LoadPage function. 253// findwhat - A unicode match pattern. 254// flags - Option flags. 255// start_index - Start from this character. -1 for end of the page. 256// Return Value: 257// A handle for the search context. FPDFText_FindClose must be called 258// to release this handle. 259// 260DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, 261 FPDF_WIDESTRING findwhat, 262 unsigned long flags, 263 int start_index); 264 265// Function: FPDFText_FindNext 266// Search in the direction from page start to end. 267// Parameters: 268// handle - A search context handle returned by 269// FPDFText_FindStart. 270// Return Value: 271// Whether a match is found. 272// 273DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle); 274 275// Function: FPDFText_FindPrev 276// Search in the direction from page end to start. 277// Parameters: 278// handle - A search context handle returned by 279// FPDFText_FindStart. 280// Return Value: 281// Whether a match is found. 282// 283DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle); 284 285// Function: FPDFText_GetSchResultIndex 286// Get the starting character index of the search result. 287// Parameters: 288// handle - A search context handle returned by 289// FPDFText_FindStart. 290// Return Value: 291// Index for the starting character. 292// 293DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle); 294 295// Function: FPDFText_GetSchCount 296// Get the number of matched characters in the search result. 297// Parameters: 298// handle - A search context handle returned by 299// FPDFText_FindStart. 300// Return Value: 301// Number of matched characters. 302// 303DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle); 304 305// Function: FPDFText_FindClose 306// Release a search context. 307// Parameters: 308// handle - A search context handle returned by 309// FPDFText_FindStart. 310// Return Value: 311// None. 312// 313DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); 314 315// Function: FPDFLink_LoadWebLinks 316// Prepare information about weblinks in a page. 317// Parameters: 318// text_page - Handle to a text page information structure. 319// Returned by FPDFText_LoadPage function. 320// Return Value: 321// A handle to the page's links information structure. 322// NULL if something goes wrong. 323// Comments: 324// Weblinks are those links implicitly embedded in PDF pages. PDF also 325// has a type of 326// annotation called "link", FPDFTEXT doesn't deal with that kind of 327// link. 328// FPDFTEXT weblink feature is useful for automatically detecting links 329// in the page 330// contents. For example, things like "http://www.foxitsoftware.com" 331// will be detected, 332// so applications can allow user to click on those characters to 333// activate the link, 334// even the PDF doesn't come with link annotations. 335// 336// FPDFLink_CloseWebLinks must be called to release resources. 337// 338DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page); 339 340// Function: FPDFLink_CountWebLinks 341// Count number of detected web links. 342// Parameters: 343// link_page - Handle returned by FPDFLink_LoadWebLinks. 344// Return Value: 345// Number of detected web links. 346// 347DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); 348 349// Function: FPDFLink_GetURL 350// Fetch the URL information for a detected web link. 351// Parameters: 352// link_page - Handle returned by FPDFLink_LoadWebLinks. 353// link_index - Zero-based index for the link. 354// buffer - A unicode buffer. 355// buflen - Number of characters (not bytes) for the buffer, 356// including an additional terminator. 357// Return Value: 358// If buffer is NULL or buflen is zero, return number of characters 359// (not bytes and an additional terminator is also counted) needed, 360// otherwise, return number of characters copied into the buffer. 361// 362DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, 363 int link_index, 364 unsigned short* buffer, 365 int buflen); 366 367// Function: FPDFLink_CountRects 368// Count number of rectangular areas for the link. 369// Parameters: 370// link_page - Handle returned by FPDFLink_LoadWebLinks. 371// link_index - Zero-based index for the link. 372// Return Value: 373// Number of rectangular areas for the link. 374// 375DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, 376 int link_index); 377 378// Function: FPDFLink_GetRect 379// Fetch the boundaries of a rectangle for a link. 380// Parameters: 381// link_page - Handle returned by FPDFLink_LoadWebLinks. 382// link_index - Zero-based index for the link. 383// rect_index - Zero-based index for a rectangle. 384// left - Pointer to a double value receiving the rectangle 385// left boundary. 386// top - Pointer to a double value receiving the rectangle 387// top boundary. 388// right - Pointer to a double value receiving the rectangle 389// right boundary. 390// bottom - Pointer to a double value receiving the rectangle 391// bottom boundary. 392// Return Value: 393// None. 394// 395DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, 396 int link_index, 397 int rect_index, 398 double* left, 399 double* top, 400 double* right, 401 double* bottom); 402 403// Function: FPDFLink_CloseWebLinks 404// Release resources used by weblink feature. 405// Parameters: 406// link_page - Handle returned by FPDFLink_LoadWebLinks. 407// Return Value: 408// None. 409// 410DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); 411 412#ifdef __cplusplus 413} 414#endif 415 416#endif // PUBLIC_FPDF_TEXT_H_ 417