1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef _FPDF_TEXT_H_
8#define _FPDF_TEXT_H_
9#ifndef _FPDF_PARSER_
10#include "../fpdfapi/fpdf_parser.h"
11#endif
12#ifndef _FPDF_PAGEOBJ_H_
13#include "../fpdfapi/fpdf_pageobj.h"
14#endif
15#ifndef _FPDF_PAGE_
16#include "../fpdfapi/fpdf_page.h"
17#endif
18class CPDF_PageObjects;
19#define PDF2TXT_AUTO_ROTATE		1
20#define PDF2TXT_AUTO_WIDTH		2
21#define PDF2TXT_KEEP_COLUMN		4
22#define PDF2TXT_USE_OCR			8
23#define PDF2TXT_INCLUDE_INVISIBLE	16
24void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
25                     int iMinWidth, FX_DWORD flags);
26void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
27                             int iMinWidth, FX_DWORD flags);
28void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
29                               FX_DWORD flags);
30CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage);
31class IPDF_TextPage;
32class IPDF_LinkExtract;
33class IPDF_TextPageFind;
34#define CHAR_ERROR			-1
35#define CHAR_NORMAL			0
36#define CHAR_GENERATED		1
37#define CHAR_UNUNICODE		2
38typedef struct {
39    FX_WCHAR			m_Unicode;
40    FX_WCHAR			m_Charcode;
41    FX_INT32			m_Flag;
42    FX_FLOAT			m_FontSize;
43    FX_FLOAT			m_OriginX;
44    FX_FLOAT			m_OriginY;
45    CFX_FloatRect		m_CharBox;
46    CPDF_TextObject*	m_pTextObj;
47    CFX_AffineMatrix	m_Matrix;
48} FPDF_CHAR_INFO;
49typedef	CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray;
50#define FPDFTEXT_LRTB	0
51#define FPDFTEXT_RLTB	1
52#define FPDFTEXT_TBRL	2
53#define FPDFTEXT_LEFT			-1
54#define FPDFTEXT_RIGHT			1
55#define FPDFTEXT_UP				-2
56#define FPDFTEXT_DOWN			2
57class IPDF_ReflowedPage;
58#define FPDFTEXT_WRITINGMODE_UNKNOW	0
59#define FPDFTEXT_WRITINGMODE_LRTB	1
60#define FPDFTEXT_WRITINGMODE_RLTB	2
61#define FPDFTEXT_WRITINGMODE_TBRL	3
62class CPDFText_ParseOptions : public CFX_Object
63{
64public:
65
66    CPDFText_ParseOptions();
67    FX_BOOL			m_bGetCharCodeOnly;
68    FX_BOOL			m_bNormalizeObjs;
69    FX_BOOL			m_bOutputHyphen;
70};
71class IPDF_TextPage : public CFX_Object
72{
73public:
74
75    virtual ~IPDF_TextPage() {}
76    static IPDF_TextPage*	CreateTextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
77    static IPDF_TextPage*	CreateTextPage(const CPDF_Page* pPage, int flags = 0);
78    static IPDF_TextPage*	CreateTextPage(const CPDF_PageObjects* pObjs, int flags = 0);
79    static IPDF_TextPage*	CreateReflowTextPage(IPDF_ReflowedPage* pRefPage);
80
81    virtual void			NormalizeObjects(FX_BOOL bNormalize) = 0;
82
83    virtual FX_BOOL			ParseTextPage() = 0;
84
85
86    virtual FX_BOOL			IsParsered() const = 0;
87public:
88
89    virtual int CharIndexFromTextIndex(int TextIndex) const = 0;
90
91    virtual int TextIndexFromCharIndex(int CharIndex) const = 0;
92
93
94    virtual int				CountChars() const = 0;
95
96    virtual	void			GetCharInfo(int index, FPDF_CHAR_INFO & info) const = 0;
97
98    virtual void			GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const = 0;
99
100
101
102    virtual int				GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
103
104    virtual int				GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
105
106    virtual	int				GetOrderByDirection(int index, int direction) const = 0;
107
108    virtual CFX_WideString	GetTextByRect(CFX_FloatRect rect) const = 0;
109
110    virtual void			GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const = 0;
111
112
113    virtual int				CountRects(int start, int nCount) = 0;
114
115    virtual	void			GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const = 0;
116
117    virtual FX_BOOL			GetBaselineRotate(int rectIndex, int& Rotate) = 0;
118
119    virtual FX_BOOL			GetBaselineRotate(CFX_FloatRect rect, int& Rotate) = 0;
120
121    virtual	int				CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE) = 0;
122
123    virtual	void			GetBoundedSegment(int index, int& start, int& count) const = 0;
124
125
126    virtual int				GetWordBreak(int index, int direction) const = 0;
127
128    virtual CFX_WideString	GetPageText(int start = 0, int nCount = -1 ) const = 0;
129};
130#define FPDFTEXT_MATCHCASE      0x00000001
131#define FPDFTEXT_MATCHWHOLEWORD 0x00000002
132#define FPDFTEXT_CONSECUTIVE	0x00000004
133class IPDF_TextPageFind : public CFX_Object
134{
135public:
136
137    virtual	~IPDF_TextPageFind() {}
138
139    static	IPDF_TextPageFind*	CreatePageFind(const IPDF_TextPage* pTextPage);
140public:
141
142    virtual	FX_BOOL				FindFirst(CFX_WideString findwhat, int flags, int startPos = 0) = 0;
143
144    virtual	FX_BOOL				FindNext() = 0;
145
146    virtual	FX_BOOL				FindPrev() = 0;
147
148    virtual void				GetRectArray(CFX_RectArray& rects) const = 0;
149
150    virtual int					GetCurOrder() const = 0;
151
152    virtual int					GetMatchedCount() const = 0;
153};
154class IPDF_LinkExtract : public CFX_Object
155{
156public:
157
158    virtual	~IPDF_LinkExtract() {}
159
160    static	IPDF_LinkExtract*	CreateLinkExtract();
161
162    virtual FX_BOOL				ExtractLinks(const IPDF_TextPage* pTextPage) = 0;
163public:
164
165    virtual int					CountLinks() const = 0;
166
167    virtual CFX_WideString		GetURL(int index) const = 0;
168
169    virtual	void				GetBoundedSegment(int index, int& start, int& count) const = 0;
170
171    virtual void				GetRects(int index, CFX_RectArray& rects) const = 0;
172};
173#endif
174