1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#ifndef _FPDFTEXT_H_ 8#define _FPDFTEXT_H_ 9 10#include "fpdfview.h" 11 12// Exported Functions 13#ifdef __cplusplus 14extern "C" { 15#endif 16 17// Function: FPDFText_LoadPage 18// Prepare information about all characters in a page. 19// Parameters: 20// page - Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module). 21// Return value: 22// A handle to the text page information structure. 23// NULL if something goes wrong. 24// Comments: 25// Application must call FPDFText_ClosePage to release the text page information. 26// If you don't purchase Text Module , this function will return NULL. 27// 28DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); 29 30// Function: FPDFText_ClosePage 31// Release all resources allocated for a text page information structure. 32// Parameters: 33// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 34// Return Value: 35// None. 36// 37DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); 38 39// Function: FPDFText_CountChars 40// Get number of characters in a page. 41// Parameters: 42// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 43// Return value: 44// Number of characters in the page. Return -1 for error. 45// Generated characters, like additional space characters, new line characters, are also counted. 46// Comments: 47// Characters in a page form a "stream", inside the stream, each character has an index. 48// We will use the index parameters in many of FPDFTEXT functions. The first character in the page 49// has an index value of zero. 50// 51DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); 52 53// Function: FPDFText_GetUnicode 54// Get Unicode of a character in a page. 55// Parameters: 56// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 57// index - Zero-based index of the character. 58// Return value: 59// The Unicode of the particular character. 60// If a character is not encoded in Unicode and Foxit engine can't convert to Unicode, 61// the return value will be zero. 62// 63DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index); 64 65// Function: FPDFText_GetFontSize 66// Get the font size of a particular character. 67// Parameters: 68// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 69// index - Zero-based index of the character. 70// Return value: 71// The font size of the particular character, measured in points (about 1/72 inch). 72// This is the typographic size of the font (so called "em size"). 73// 74DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index); 75 76// Function: FPDFText_GetCharBox 77// Get bounding box of a particular character. 78// Parameters: 79// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 80// index - Zero-based index of the character. 81// left - Pointer to a double number receiving left position of the character box. 82// right - Pointer to a double number receiving right position of the character box. 83// bottom - Pointer to a double number receiving bottom position of the character box. 84// top - Pointer to a double number receiving top position of the character box. 85// Return Value: 86// None. 87// Comments: 88// All positions are measured in PDF "user space". 89// 90DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left, 91 double* right, double* bottom, double* top); 92 93// Function: FPDFText_GetCharIndexAtPos 94// Get the index of a character at or nearby a certain position on the page. 95// Parameters: 96// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 97// x - X position in PDF "user space". 98// y - Y position in PDF "user space". 99// xTolerance - An x-axis tolerance value for character hit detection, in point unit. 100// yTolerance - A y-axis tolerance value for character hit detection, in point unit. 101// Return Value: 102// The zero-based index of the character at, or nearby the point (x,y). 103// If there is no character at or nearby the point, return value will be -1. 104// If an error occurs, -3 will be returned. 105// 106DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, 107 double x, double y, double xTorelance, double yTolerance); 108 109// Function: FPDFText_GetText 110// Extract unicode text string from the page. 111// Parameters: 112// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 113// start_index - Index for the start characters. 114// count - Number of characters to be extracted. 115// result - A buffer (allocated by application) receiving the extracted unicodes. 116// The size of the buffer must be able to hold the number of characters plus a terminator. 117// Return Value: 118// Number of characters written into the result buffer, including the trailing terminator. 119// Comments: 120// This function ignores characters without unicode information. 121// 122DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result); 123 124// Function: FPDFText_CountRects 125// Count number of rectangular areas occupied by a segment of texts. 126// Parameters: 127// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 128// start_index - Index for the start characters. 129// count - Number of characters. 130// Return value: 131// Number of rectangles. Zero for error. 132// Comments: 133// This function, along with FPDFText_GetRect can be used by applications to detect the position 134// on the page for a text segment, so proper areas can be highlighted or something. 135// FPDFTEXT will automatically merge small character boxes into bigger one if those characters 136// are on the same line and use same font settings. 137// 138DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_index, int count); 139 140// Function: FPDFText_GetRect 141// Get a rectangular area from the result generated by FPDFText_CountRects. 142// Parameters: 143// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 144// rect_index - Zero-based index for the rectangle. 145// left - Pointer to a double value receiving the rectangle left boundary. 146// top - Pointer to a double value receiving the rectangle top boundary. 147// right - Pointer to a double value receiving the rectangle right boundary. 148// bottom - Pointer to a double value receiving the rectangle bottom boundary. 149// Return Value: 150// None. 151// 152DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index, double* left, double* top, 153 double* right, double* bottom); 154 155// Function: FPDFText_GetBoundedText 156// Extract unicode text within a rectangular boundary on the page. 157// Parameters: 158// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 159// left - Left boundary. 160// top - Top boundary. 161// right - Right boundary. 162// bottom - Bottom boundary. 163// buffer - A unicode buffer. 164// buflen - Number of characters (not bytes) for the buffer, excluding an additional terminator. 165// Return Value: 166// If buffer is NULL or buflen is zero, return number of characters (not bytes) needed, 167// otherwise, return number of characters copied into the buffer. 168// 169DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top, 170 double right, double bottom,unsigned short* buffer,int buflen); 171 172 173// Flags used by FPDFText_FindStart function. 174#define FPDF_MATCHCASE 0x00000001 //If not set, it will not match case by default. 175#define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match the whole word by default. 176 177// Function: FPDFText_FindStart 178// Start a search. 179// Parameters: 180// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 181// findwhat - A unicode match pattern. 182// flags - Option flags. 183// start_index - Start from this character. -1 for end of the page. 184// Return Value: 185// A handle for the search context. FPDFText_FindClose must be called to release this handle. 186// 187DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat, 188 unsigned long flags, int start_index); 189 190// Function: FPDFText_FindNext 191// Search in the direction from page start to end. 192// Parameters: 193// handle - A search context handle returned by FPDFText_FindStart. 194// Return Value: 195// Whether a match is found. 196// 197DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle); 198 199// Function: FPDFText_FindPrev 200// Search in the direction from page end to start. 201// Parameters: 202// handle - A search context handle returned by FPDFText_FindStart. 203// Return Value: 204// Whether a match is found. 205// 206DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle); 207 208// Function: FPDFText_GetSchResultIndex 209// Get the starting character index of the search result. 210// Parameters: 211// handle - A search context handle returned by FPDFText_FindStart. 212// Return Value: 213// Index for the starting character. 214// 215DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle); 216 217// Function: FPDFText_GetSchCount 218// Get the number of matched characters in the search result. 219// Parameters: 220// handle - A search context handle returned by FPDFText_FindStart. 221// Return Value: 222// Number of matched characters. 223// 224DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle); 225 226// Function: FPDFText_FindClose 227// Release a search context. 228// Parameters: 229// handle - A search context handle returned by FPDFText_FindStart. 230// Return Value: 231// None. 232// 233DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); 234 235// Function: FPDFLink_LoadWebLinks 236// Prepare information about weblinks in a page. 237// Parameters: 238// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 239// Return Value: 240// A handle to the page's links information structure. 241// NULL if something goes wrong. 242// Comments: 243// Weblinks are those links implicitly embedded in PDF pages. PDF also has a type of 244// annotation called "link", FPDFTEXT doesn't deal with that kind of link. 245// FPDFTEXT weblink feature is useful for automatically detecting links in the page 246// contents. For example, things like "http://www.foxitsoftware.com" will be detected, 247// so applications can allow user to click on those characters to activate the link, 248// even the PDF doesn't come with link annotations. 249// 250// FPDFLink_CloseWebLinks must be called to release resources. 251// 252DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page); 253 254// Function: FPDFLink_CountWebLinks 255// Count number of detected web links. 256// Parameters: 257// link_page - Handle returned by FPDFLink_LoadWebLinks. 258// Return Value: 259// Number of detected web links. 260// 261DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); 262 263// Function: FPDFLink_GetURL 264// Fetch the URL information for a detected web link. 265// Parameters: 266// link_page - Handle returned by FPDFLink_LoadWebLinks. 267// link_index - Zero-based index for the link. 268// buffer - A unicode buffer. 269// buflen - Number of characters (not bytes) for the buffer, including an additional terminator. 270// Return Value: 271// If buffer is NULL or buflen is zero, return number of characters (not bytes and an additional terminator is also counted) needed, 272// otherwise, return number of characters copied into the buffer. 273// 274DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer,int buflen); 275 276// Function: FPDFLink_CountRects 277// Count number of rectangular areas for the link. 278// Parameters: 279// link_page - Handle returned by FPDFLink_LoadWebLinks. 280// link_index - Zero-based index for the link. 281// Return Value: 282// Number of rectangular areas for the link. 283// 284DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index); 285 286// Function: FPDFLink_GetRect 287// Fetch the boundaries of a rectangle for a link. 288// Parameters: 289// link_page - Handle returned by FPDFLink_LoadWebLinks. 290// link_index - Zero-based index for the link. 291// rect_index - Zero-based index for a rectangle. 292// left - Pointer to a double value receiving the rectangle left boundary. 293// top - Pointer to a double value receiving the rectangle top boundary. 294// right - Pointer to a double value receiving the rectangle right boundary. 295// bottom - Pointer to a double value receiving the rectangle bottom boundary. 296// Return Value: 297// None. 298// 299DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index, 300 double* left, double* top,double* right, double* bottom); 301 302// Function: FPDFLink_CloseWebLinks 303// Release resources used by weblink feature. 304// Parameters: 305// link_page - Handle returned by FPDFLink_LoadWebLinks. 306// Return Value: 307// None. 308// 309DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); 310 311 312#ifdef __cplusplus 313}; 314#endif 315 316#endif//_FPDFTEXT_H_ 317