1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_
8#define CORE_SRC_FPDFTEXT_TEXT_INT_H_
9
10class CPDF_TextParseOptions
11{
12public:
13    CPDF_TextParseOptions();
14    FX_BOOL			m_bCheckObjectOrder;
15    FX_BOOL			m_bCheckDirection;
16    int				m_nCheckSameObject;
17};
18class CPDF_TextPage;
19class CPDF_LinkExtract;
20class CPDF_TextPageFind;
21class CPDF_DocProgressiveSearch;
22#define FPDFTEXT_CHAR_ERROR			-1
23#define FPDFTEXT_CHAR_NORMAL		0
24#define FPDFTEXT_CHAR_GENERATED		1
25#define FPDFTEXT_CHAR_UNUNICODE		2
26#define FPDFTEXT_CHAR_HYPHEN		3
27#define FPDFTEXT_CHAR_PIECE			4
28#define FPDFTEXT_MC_PASS			0
29#define FPDFTEXT_MC_DONE			1
30#define FPDFTEXT_MC_DELAY			2
31typedef struct _PAGECHAR_INFO {
32    int					m_CharCode;
33    FX_WCHAR			m_Unicode;
34    FX_FLOAT			m_OriginX;
35    FX_FLOAT			m_OriginY;
36    FX_INT32			m_Flag;
37    CFX_FloatRect		m_CharBox;
38    CPDF_TextObject*	m_pTextObj;
39    CFX_AffineMatrix	m_Matrix;
40    int					m_Index;
41} PAGECHAR_INFO;
42typedef	CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray;
43typedef struct {
44    int	m_Start;
45    int m_nCount;
46} FPDF_SEGMENT;
47typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array;
48typedef struct {
49    CPDF_TextObject*	m_pTextObj;
50    CFX_AffineMatrix	m_formMatrix;
51} PDFTEXT_Obj;
52typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ;
53class CPDF_TextPage: public IPDF_TextPage
54{
55public:
56    CPDF_TextPage(const CPDF_Page* pPage, int flags = 0);
57    CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0);
58    CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
59    virtual FX_BOOL					ParseTextPage();
60    virtual void					NormalizeObjects(FX_BOOL bNormalize);
61    virtual	FX_BOOL					IsParsered() const
62    {
63        return m_IsParsered;
64    }
65    virtual ~CPDF_TextPage() {};
66public:
67    virtual int CharIndexFromTextIndex(int TextIndex)const ;
68    virtual int TextIndexFromCharIndex(int CharIndex)const;
69    virtual int						CountChars() const;
70    virtual	void					GetCharInfo(int index, FPDF_CHAR_INFO & info) const;
71    virtual void					GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const;
72    virtual int						GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const;
73    virtual int						GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance,
74            FX_FLOAT yTorelance) const;
75    virtual CFX_WideString			GetTextByRect(const CFX_FloatRect& rect) const;
76    virtual void					GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const;
77    virtual	int						GetOrderByDirection(int order, int direction) const;
78    virtual	CFX_WideString			GetPageText(int start = 0, int nCount = -1) const;
79
80    virtual int						CountRects(int start, int nCount);
81    virtual	void					GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top
82                                            , FX_FLOAT& right, FX_FLOAT &bottom) const;
83    virtual FX_BOOL					GetBaselineRotate(int rectIndex, int& Rotate);
84    virtual FX_BOOL					GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate);
85    virtual	int						CountBoundedSegments(FX_FLOAT left, FX_FLOAT top,
86            FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE);
87    virtual	void					GetBoundedSegment(int index, int& start, int& count) const;
88    virtual int						GetWordBreak(int index, int direction) const;
89public:
90    const	PAGECHAR_InfoArray*		GetCharList() const
91    {
92        return &m_charList;
93    }
94    static	FX_BOOL					IsRectIntersect(const CFX_FloatRect& rect1, const CFX_FloatRect& rect2);
95    static	FX_BOOL					IsLetter(FX_WCHAR unicode);
96private:
97    FX_BOOL							IsHyphen(FX_WCHAR curChar);
98    FX_BOOL							IsControlChar(PAGECHAR_INFO* pCharInfo);
99    FX_BOOL							GetBaselineRotate(int start, int end, int& Rotate);
100    void							ProcessObject();
101    void							ProcessFormObject(CPDF_FormObject*	pFormObj, const CFX_AffineMatrix& formMatrix);
102    void							ProcessTextObject(PDFTEXT_Obj pObj);
103    void							ProcessTextObject(CPDF_TextObject*	pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITION ObjPos);
104    int								ProcessInsertObject(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix);
105    FX_BOOL							GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
106    FX_BOOL							IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos);
107    FX_BOOL							IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2);
108    int								GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const;
109    void							CloseTempLine();
110    void							OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str);
111    FX_INT32	PreMarkedContent(PDFTEXT_Obj pObj);
112    void		ProcessMarkedContent(PDFTEXT_Obj pObj);
113    void		CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const;
114    void		FindPreviousTextObject(void);
115    void		AddCharInfoByLRDirection(CFX_WideString& str, int i);
116    void		AddCharInfoByRLDirection(CFX_WideString& str, int i);
117    FX_INT32	GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
118    FX_INT32	FindTextlineFlowDirection();
119    void SwapTempTextBuf(FX_INT32 iCharListStartAppend,
120                         FX_INT32 iBufStartAppend);
121    FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
122                          const CPDF_Font* pFont,
123                          int nItems) const;
124protected:
125    CPDFText_ParseOptions			m_ParseOptions;
126    CFX_WordArray					m_CharIndex;
127    const CPDF_PageObjects*			m_pPage;
128    PAGECHAR_InfoArray				m_charList;
129    CFX_WideTextBuf					m_TextBuf;
130    PAGECHAR_InfoArray				m_TempCharList;
131    CFX_WideTextBuf					m_TempTextBuf;
132    int								m_parserflag;
133    CPDF_TextObject*				m_pPreTextObj;
134    CFX_AffineMatrix				m_perMatrix;
135    FX_BOOL							m_IsParsered;
136    CFX_AffineMatrix				m_DisplayMatrix;
137
138    SEGMENT_Array					m_Segment;
139    CFX_RectArray					m_SelRects;
140    LINEOBJ							m_LineObj;
141    FX_BOOL							m_TextlineDir;
142    CFX_FloatRect					m_CurlineRect;
143};
144class CPDF_TextPageFind: public IPDF_TextPageFind
145{
146public:
147    CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
148    virtual							~CPDF_TextPageFind() {};
149public:
150    virtual	FX_BOOL					FindFirst(const CFX_WideString& findwhat, int flags, int startPos = 0);
151    virtual	FX_BOOL					FindNext();
152    virtual	FX_BOOL					FindPrev();
153
154    virtual void					GetRectArray(CFX_RectArray& rects) const;
155    virtual int						GetCurOrder() const;
156    virtual int						GetMatchedCount()const;
157protected:
158    void							ExtractFindWhat(const CFX_WideString& findwhat);
159    FX_BOOL							IsMatchWholeWord(const CFX_WideString& csPageText, int startPos, int endPos);
160    FX_BOOL							ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString,
161            int iSubString, FX_WCHAR chSep);
162    CFX_WideString					MakeReverse(const CFX_WideString& str);
163    int								ReverseFind(const CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int& WordLength);
164    int								GetCharIndex(int index) const;
165private:
166    CFX_WordArray					m_CharIndex;
167    const IPDF_TextPage*			m_pTextPage;
168    CFX_WideString					m_strText;
169    CFX_WideString					m_findWhat;
170    int								m_flags;
171    CFX_WideStringArray				m_csFindWhatArray;
172    int								m_findNextStart;
173    int								m_findPreStart;
174    FX_BOOL							m_bMatchCase;
175    FX_BOOL							m_bMatchWholeWord;
176    int								m_resStart;
177    int								m_resEnd;
178    CFX_RectArray					m_resArray;
179    FX_BOOL							m_IsFind;
180};
181class CPDF_LinkExt
182{
183public:
184    CPDF_LinkExt() {};
185    int								m_Start;
186    int								m_Count;
187    CFX_WideString					m_strUrl;
188    virtual							~CPDF_LinkExt() {};
189};
190typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
191class CPDF_LinkExtract: public IPDF_LinkExtract
192{
193public:
194    CPDF_LinkExtract();
195    virtual							~CPDF_LinkExtract();
196    virtual FX_BOOL					ExtractLinks(const IPDF_TextPage* pTextPage);
197    virtual	FX_BOOL					IsExtract() const
198    {
199        return m_IsParserd;
200    }
201public:
202    virtual int						CountLinks() const;
203    virtual	CFX_WideString			GetURL(int index) const;
204    virtual	void					GetBoundedSegment(int index, int& start, int& count) const;
205    virtual	void					GetRects(int index, CFX_RectArray& rects)const;
206protected:
207    void							parserLink();
208    void							DeleteLinkList();
209    FX_BOOL							CheckWebLink(CFX_WideString& strBeCheck);
210    FX_BOOL							CheckMailLink(CFX_WideString& str);
211    FX_BOOL							AppendToLinkList(int start, int count, const CFX_WideString& strUrl);
212private:
213    LINK_InfoArray					m_LinkList;
214    const CPDF_TextPage*			m_pTextPage;
215    CFX_WideString					m_strPageText;
216    FX_BOOL							m_IsParserd;
217};
218FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst);
219void NormalizeString(CFX_WideString& str);
220void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest);
221
222#endif  // CORE_SRC_FPDFTEXT_TEXT_INT_H_
223