1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef _FPDFTEXT_H_
8#define _FPDFTEXT_H_
9
10#include "fpdfview.h"
11
12// Exported Functions
13#ifdef __cplusplus
14extern "C" {
15#endif
16
17// Function: FPDFText_LoadPage
18//			Prepare information about all characters in a page.
19// Parameters:
20//			page	-	Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module).
21// Return value:
22//			A handle to the text page information structure.
23//			NULL if something goes wrong.
24// Comments:
25//			Application must call FPDFText_ClosePage to release the text page information.
26//			If you don't purchase Text Module , this function will return NULL.
27//
28DLLEXPORT FPDF_TEXTPAGE	STDCALL FPDFText_LoadPage(FPDF_PAGE page);
29
30// Function: FPDFText_ClosePage
31//			Release all resources allocated for a text page information structure.
32// Parameters:
33//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
34// Return Value:
35//			None.
36//
37DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page);
38
39// Function: FPDFText_CountChars
40//			Get number of characters in a page.
41// Parameters:
42//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
43// Return value:
44//			Number of characters in the page. Return -1 for error.
45//			Generated characters, like additional space characters, new line characters, are also counted.
46// Comments:
47//			Characters in a page form a "stream", inside the stream, each character has an index.
48//			We will use the index parameters in many of FPDFTEXT functions. The first character in the page
49//			has an index value of zero.
50//
51DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page);
52
53// Function: FPDFText_GetUnicode
54//			Get Unicode of a character in a page.
55// Parameters:
56//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
57//			index		-	Zero-based index of the character.
58// Return value:
59//			The Unicode of the particular character.
60//			If a character is not encoded in Unicode and Foxit engine can't convert to Unicode,
61//			the return value will be zero.
62//
63DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index);
64
65// Function: FPDFText_GetFontSize
66//			Get the font size of a particular character.
67// Parameters:
68//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
69//			index		-	Zero-based index of the character.
70// Return value:
71//			The font size of the particular character, measured in points (about 1/72 inch).
72//			This is the typographic size of the font (so called "em size").
73//
74DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index);
75
76// Function: FPDFText_GetCharBox
77//			Get bounding box of a particular character.
78// Parameters:
79//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
80//			index		-	Zero-based index of the character.
81//			left		-	Pointer to a double number receiving left position of the character box.
82//			right		-	Pointer to a double number receiving right position of the character box.
83//			bottom		-	Pointer to a double number receiving bottom position of the character box.
84//			top			-	Pointer to a double number receiving top position of the character box.
85// Return Value:
86//			None.
87// Comments:
88//			All positions are measured in PDF "user space".
89//
90DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left,
91													double* right, double* bottom, double* top);
92
93// Function: FPDFText_GetCharIndexAtPos
94//			Get the index of a character at or nearby a certain position on the page.
95// Parameters:
96//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
97//			x			-	X position in PDF "user space".
98//			y			-	Y position in PDF "user space".
99//			xTolerance	-	An x-axis tolerance value for character hit detection, in point unit.
100//			yTolerance	-	A y-axis tolerance value for character hit detection, in point unit.
101// Return Value:
102//			The zero-based index of the character at, or nearby the point (x,y).
103//			If there is no character at or nearby the point, return value will be -1.
104//			If an error occurs, -3 will be returned.
105//
106DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
107												 double x, double y, double xTorelance, double yTolerance);
108
109// Function: FPDFText_GetText
110//			Extract unicode text string from the page.
111// Parameters:
112//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
113//			start_index	-	Index for the start characters.
114//			count		-	Number of characters to be extracted.
115//			result		-	A buffer (allocated by application) receiving the extracted unicodes.
116//							The size of the buffer must be able to hold the number of characters plus a terminator.
117// Return Value:
118//			Number of characters written into the result buffer, including the trailing terminator.
119// Comments:
120//			This function ignores characters without unicode information.
121//
122DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result);
123
124// Function: FPDFText_CountRects
125//			Count number of rectangular areas occupied by a segment of texts.
126// Parameters:
127//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
128//			start_index	-	Index for the start characters.
129//			count		-	Number of characters.
130// Return value:
131//			Number of rectangles. Zero for error.
132// Comments:
133//			This function, along with FPDFText_GetRect can be used by applications to detect the position
134//			on the page for a text segment, so proper areas can be highlighted or something.
135//			FPDFTEXT will automatically merge small character boxes into bigger one if those characters
136//			are on the same line and use same font settings.
137//
138DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_index, int count);
139
140// Function: FPDFText_GetRect
141//			Get a rectangular area from the result generated by FPDFText_CountRects.
142// Parameters:
143//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
144//			rect_index	-	Zero-based index for the rectangle.
145//			left		-	Pointer to a double value receiving the rectangle left boundary.
146//			top			-	Pointer to a double value receiving the rectangle top boundary.
147//			right		-	Pointer to a double value receiving the rectangle right boundary.
148//			bottom		-	Pointer to a double value receiving the rectangle bottom boundary.
149// Return Value:
150//			None.
151//
152DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index, double* left, double* top,
153											double* right, double* bottom);
154
155// Function: FPDFText_GetBoundedText
156//			Extract unicode text within a rectangular boundary on the page.
157// Parameters:
158//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
159//			left		-	Left boundary.
160//			top			-	Top boundary.
161//			right		-	Right boundary.
162//			bottom		-	Bottom boundary.
163//			buffer		-	A unicode buffer.
164//			buflen		-	Number of characters (not bytes) for the buffer, excluding an additional terminator.
165// Return Value:
166//			If buffer is NULL or buflen is zero, return number of characters (not bytes) needed,
167//			otherwise, return number of characters copied into the buffer.
168//
169DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top,
170											  double right, double bottom,unsigned short* buffer,int buflen);
171
172
173// Flags used by FPDFText_FindStart function.
174#define FPDF_MATCHCASE      0x00000001		//If not set, it will not match case by default.
175#define FPDF_MATCHWHOLEWORD 0x00000002		//If not set, it will not match the whole word by default.
176
177// Function: FPDFText_FindStart
178//			Start a search.
179// Parameters:
180//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
181//			findwhat	-	A unicode match pattern.
182//			flags		-	Option flags.
183//			start_index	-	Start from this character. -1 for end of the page.
184// Return Value:
185//			A handle for the search context. FPDFText_FindClose must be called to release this handle.
186//
187DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat,
188													unsigned long flags, int start_index);
189
190// Function: FPDFText_FindNext
191//			Search in the direction from page start to end.
192// Parameters:
193//			handle		-	A search context handle returned by FPDFText_FindStart.
194// Return Value:
195//			Whether a match is found.
196//
197DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle);
198
199// Function: FPDFText_FindPrev
200//			Search in the direction from page end to start.
201// Parameters:
202//			handle		-	A search context handle returned by FPDFText_FindStart.
203// Return Value:
204//			Whether a match is found.
205//
206DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle);
207
208// Function: FPDFText_GetSchResultIndex
209//			Get the starting character index of the search result.
210// Parameters:
211//			handle		-	A search context handle returned by FPDFText_FindStart.
212// Return Value:
213//			Index for the starting character.
214//
215DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle);
216
217// Function: FPDFText_GetSchCount
218//			Get the number of matched characters in the search result.
219// Parameters:
220//			handle		-	A search context handle returned by FPDFText_FindStart.
221// Return Value:
222//			Number of matched characters.
223//
224DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle);
225
226// Function: FPDFText_FindClose
227//			Release a search context.
228// Parameters:
229//			handle		-	A search context handle returned by FPDFText_FindStart.
230// Return Value:
231//			None.
232//
233DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle);
234
235// Function: FPDFLink_LoadWebLinks
236//			Prepare information about weblinks in a page.
237// Parameters:
238//			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
239// Return Value:
240//			A handle to the page's links information structure.
241//			NULL if something goes wrong.
242// Comments:
243//			Weblinks are those links implicitly embedded in PDF pages. PDF also has a type of
244//			annotation called "link", FPDFTEXT doesn't deal with that kind of link.
245//			FPDFTEXT weblink feature is useful for automatically detecting links in the page
246//			contents. For example, things like "http://www.foxitsoftware.com" will be detected,
247//			so applications can allow user to click on those characters to activate the link,
248//			even the PDF doesn't come with link annotations.
249//
250//			FPDFLink_CloseWebLinks must be called to release resources.
251//
252DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page);
253
254// Function: FPDFLink_CountWebLinks
255//			Count number of detected web links.
256// Parameters:
257//			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
258// Return Value:
259//			Number of detected web links.
260//
261DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page);
262
263// Function: FPDFLink_GetURL
264//			Fetch the URL information for a detected web link.
265// Parameters:
266//			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
267//			link_index	-	Zero-based index for the link.
268//			buffer		-	A unicode buffer.
269//			buflen		-	Number of characters (not bytes) for the buffer, including an additional terminator.
270// Return Value:
271//			If buffer is NULL or buflen is zero, return number of characters (not bytes and an additional terminator is also counted) needed,
272//			otherwise, return number of characters copied into the buffer.
273//
274DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer,int buflen);
275
276// Function: FPDFLink_CountRects
277//			Count number of rectangular areas for the link.
278// Parameters:
279//			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
280//			link_index	-	Zero-based index for the link.
281// Return Value:
282//			Number of rectangular areas for the link.
283//
284DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index);
285
286// Function: FPDFLink_GetRect
287//			Fetch the boundaries of a rectangle for a link.
288// Parameters:
289//			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
290//			link_index	-	Zero-based index for the link.
291//			rect_index	-	Zero-based index for a rectangle.
292//			left		-	Pointer to a double value receiving the rectangle left boundary.
293//			top			-	Pointer to a double value receiving the rectangle top boundary.
294//			right		-	Pointer to a double value receiving the rectangle right boundary.
295//			bottom		-	Pointer to a double value receiving the rectangle bottom boundary.
296// Return Value:
297//			None.
298//
299DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index,
300										double* left, double* top,double* right, double* bottom);
301
302// Function: FPDFLink_CloseWebLinks
303//			Release resources used by weblink feature.
304// Parameters:
305//			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
306// Return Value:
307//			None.
308//
309DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page);
310
311
312#ifdef __cplusplus
313};
314#endif
315
316#endif//_FPDFTEXT_H_
317