1ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved.
2ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Use of this source code is governed by a BSD-style license that can be
3ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// found in the LICENSE file.
4ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
5ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
7ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#ifndef _PDF_TEXT_INT_H_
8ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define _PDF_TEXT_INT_H_
9ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_TextParseOptions : public CFX_Object
10ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
11ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
12ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_TextParseOptions();
13ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			m_bCheckObjectOrder;
14ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL			m_bCheckDirection;
15ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int				m_nCheckSameObject;
16ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
17ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_TextPage;
18ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_LinkExtract;
19ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_TextPageFind;
20ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_DocProgressiveSearch;
21ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_CHAR_ERROR			-1
22ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_CHAR_NORMAL		0
23ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_CHAR_GENERATED		1
24ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_CHAR_UNUNICODE		2
25ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_CHAR_HYPHEN		3
26ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_CHAR_PIECE			4
27ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_MC_PASS			0
28ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_MC_DONE			1
29ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#define FPDFTEXT_MC_DELAY			2
30ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovtypedef struct _PAGECHAR_INFO: public CFX_Object {
31ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int					m_CharCode;
32ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_WCHAR			m_Unicode;
33ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_FLOAT			m_OriginX;
34ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_FLOAT			m_OriginY;
35ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_INT32			m_Flag;
36ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_FloatRect		m_CharBox;
37ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_TextObject*	m_pTextObj;
38ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_AffineMatrix	m_Matrix;
39ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int					m_Index;
40ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov} PAGECHAR_INFO;
41ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovtypedef	CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray;
42ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovtypedef struct {
43ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int	m_Start;
44ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int m_nCount;
45ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov} FPDF_SEGMENT;
46ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovtypedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array;
47ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovtypedef struct {
48ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_TextObject*	m_pTextObj;
49ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_AffineMatrix	m_formMatrix;
50ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov} PDFTEXT_Obj;
51ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovtypedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ;
52ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_TextPage: public IPDF_TextPage
53ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
54ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
55ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_TextPage(const CPDF_Page* pPage, int flags = 0);
56ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0);
57ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
58ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_BOOL					ParseTextPage();
59ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual void					NormalizeObjects(FX_BOOL bNormalize);
60ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	FX_BOOL					IsParsered() const
61ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
62ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_IsParsered;
63ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
64ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual ~CPDF_TextPage() {};
65ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
66ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int CharIndexFromTextIndex(int TextIndex)const ;
67ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int TextIndexFromCharIndex(int CharIndex)const;
68ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int						CountChars() const;
69ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	void					GetCharInfo(int index, FPDF_CHAR_INFO & info) const;
70ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual void					GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const;
71ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int						GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const;
72ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int						GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance,
73ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            FX_FLOAT yTorelance) const;
74ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual CFX_WideString			GetTextByRect(CFX_FloatRect rect) const;
75ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual void					GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const;
76ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	int						GetOrderByDirection(int order, int direction) const;
77ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	CFX_WideString			GetPageText(int start = 0, int nCount = -1) const;
78ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
79ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int						CountRects(int start, int nCount);
80ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	void					GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top
81ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                                            , FX_FLOAT& right, FX_FLOAT &bottom) const;
82ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_BOOL					GetBaselineRotate(int rectIndex, int& Rotate);
83ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_BOOL					GetBaselineRotate(CFX_FloatRect rect, int& Rotate);
84ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	int						CountBoundedSegments(FX_FLOAT left, FX_FLOAT top,
85ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE);
86ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	void					GetBoundedSegment(int index, int& start, int& count) const;
87ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int						GetWordBreak(int index, int direction) const;
88ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
89ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    const	PAGECHAR_InfoArray*		GetCharList() const
90ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
91ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return &m_charList;
92ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
93ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    static	FX_BOOL					IsRectIntersect(CFX_FloatRect rect1, CFX_FloatRect rect2);
94ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    static	FX_BOOL					IsLetter(FX_WCHAR unicode);
95ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovprivate:
96ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							IsHyphen(FX_WCHAR curChar);
97ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							IsControlChar(PAGECHAR_INFO* pCharInfo);
98ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							GetBaselineRotate(int start, int end, int& Rotate);
99ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							ProcessObject();
100ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							ProcessFormObject(CPDF_FormObject*	pFormObj, CFX_AffineMatrix formMatrix);
101ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							ProcessTextObject(PDFTEXT_Obj pObj);
102ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							ProcessTextObject(CPDF_TextObject*	pTextObj, CFX_AffineMatrix formMatrix, FX_POSITION ObjPos);
103ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								ProcessInsertObject(const CPDF_TextObject* pObj, CFX_AffineMatrix formMatrix);
104ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
105ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos);
106ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2);
107ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const;
108ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							CloseTempLine();
109ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str);
110ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_INT32	PreMarkedContent(PDFTEXT_Obj pObj);
111ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void		ProcessMarkedContent(PDFTEXT_Obj pObj);
112ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void		CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const;
113ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void		FindPreviousTextObject(void);
114ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void		AddCharInfoByLRDirection(CFX_WideString& str, int i);
115ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void		AddCharInfoByRLDirection(CFX_WideString& str, int i);
116ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_INT32	GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
117ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_INT32	FindTextlineFlowDirection();
118ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovprotected:
119ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDFText_ParseOptions			m_ParseOptions;
120ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WordArray					m_CharIndex;
121ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    const CPDF_PageObjects*			m_pPage;
122ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    PAGECHAR_InfoArray				m_charList;
123ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WideTextBuf					m_TextBuf;
124ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    PAGECHAR_InfoArray				m_TempCharList;
125ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WideTextBuf					m_TempTextBuf;
126ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								m_parserflag;
127ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_TextObject*				m_pPreTextObj;
128ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_AffineMatrix				m_perMatrix;
129ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							m_IsParsered;
130ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_AffineMatrix				m_DisplayMatrix;
131ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
132ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    SEGMENT_Array					m_Segment;
133ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_RectArray					m_SelRects;
134ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    LINEOBJ							m_LineObj;
135ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							m_TextlineDir;
136ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_FloatRect					m_CurlineRect;
137ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
138ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_TextPageFind: public IPDF_TextPageFind
139ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
140ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
141ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
142ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual							~CPDF_TextPageFind() {};
143ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
144ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	FX_BOOL					FindFirst(CFX_WideString findwhat, int flags, int startPos = 0);
145ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	FX_BOOL					FindNext();
146ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	FX_BOOL					FindPrev();
147ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
148ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual void					GetRectArray(CFX_RectArray& rects) const;
149ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int						GetCurOrder() const;
150ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int						GetMatchedCount()const;
151ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovprotected:
152ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							ExtractFindWhat(CFX_WideString findwhat);
153ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							IsMatchWholeWord(CFX_WideString csPageText, int startPos, int endPos);
154ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString,
155ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            int iSubString, FX_WCHAR chSep);
156ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WideString					MakeReverse(const CFX_WideString str);
157ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								ReverseFind(CFX_WideString csPageText, CFX_WideString csWord, int nStartPos, int& WordLength);
158ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								GetCharIndex(int index) const;
159ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovprivate:
160ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WordArray					m_CharIndex;
161ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    const IPDF_TextPage*			m_pTextPage;
162ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WideString					m_strText;
163ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WideString					m_findWhat;
164ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								m_flags;
165ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WideStringArray				m_csFindWhatArray;
166ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								m_findNextStart;
167ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								m_findPreStart;
168ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							m_bMatchCase;
169ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							m_bMatchWholeWord;
170ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								m_resStart;
171ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								m_resEnd;
172ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_RectArray					m_resArray;
173ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							m_IsFind;
174ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
175ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_LinkExt: public CFX_Object
176ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
177ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
178ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_LinkExt() {};
179ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								m_Start;
180ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    int								m_Count;
181ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WideString					m_strUrl;
182ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual							~CPDF_LinkExt() {};
183ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
184ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovtypedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
185ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CPDF_LinkExtract: public IPDF_LinkExtract
186ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
187ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
188ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CPDF_LinkExtract();
189ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual							~CPDF_LinkExtract();
190ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual FX_BOOL					ExtractLinks(const IPDF_TextPage* pTextPage);
191ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	FX_BOOL					IsExtract() const
192ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    {
193ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return m_IsParserd;
194ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
195ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovpublic:
196ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual int						CountLinks() const;
197ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	CFX_WideString			GetURL(int index) const;
198ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	void					GetBoundedSegment(int index, int& start, int& count) const;
199ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    virtual	void					GetRects(int index, CFX_RectArray& rects)const;
200ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovprotected:
201ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							parserLink();
202ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    void							DeleteLinkList();
203ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							CheckWebLink(CFX_WideString& strBeCheck);
204ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							CheckMailLink(CFX_WideString& str);
205ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							AppendToLinkList(int start, int count, CFX_WideString strUrl);
206ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovprivate:
207ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    LINK_InfoArray					m_LinkList;
208ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    const CPDF_TextPage*			m_pTextPage;
209ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    CFX_WideString					m_strPageText;
210ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_BOOL							m_IsParserd;
211ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
212ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst);
213ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovvoid NormalizeString(CFX_WideString& str);
214ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovvoid NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest);
215ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#endif
216