1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved.
2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be
3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file.
4ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
7e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_
8e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#define CORE_SRC_FPDFTEXT_TEXT_INT_H_
9e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
10ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "core/include/fpdftext/fpdf_text.h"
11ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "core/include/fxcrt/fx_basic.h"
12ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
13ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CFX_BidiChar;
14ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_DocProgressiveSearch;
15ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_FormObject;
16e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovclass CPDF_LinkExtract;
17e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovclass CPDF_TextPageFind;
18ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
19ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_ERROR -1
20ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_NORMAL 0
21ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_GENERATED 1
22ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_UNUNICODE 2
23ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_HYPHEN 3
24ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_PIECE 4
25ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_MC_PASS 0
26ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_MC_DONE 1
27ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_MC_DELAY 2
28ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
29e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef struct _PAGECHAR_INFO {
30ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_CharCode;
31ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_WCHAR m_Unicode;
32ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_FLOAT m_OriginX;
33ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_FLOAT m_OriginY;
34ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int32_t m_Flag;
35ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_FloatRect m_CharBox;
36ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CPDF_TextObject* m_pTextObj;
37ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_Matrix m_Matrix;
38ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_Index;
39e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} PAGECHAR_INFO;
40ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmanntypedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray;
41e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef struct {
42ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_Start;
43ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_nCount;
44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} FPDF_SEGMENT;
45e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array;
46e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef struct {
47ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CPDF_TextObject* m_pTextObj;
48ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_Matrix m_formMatrix;
49e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} PDFTEXT_Obj;
50e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ;
51ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
52ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_TextPage : public IPDF_TextPage {
53ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann public:
54ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CPDF_TextPage(const CPDF_Page* pPage, int flags);
55ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  ~CPDF_TextPage() override {}
56ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
57ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // IPDF_TextPage
58ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL ParseTextPage() override;
59ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void NormalizeObjects(FX_BOOL bNormalize) override;
60ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  bool IsParsed() const override { return m_bIsParsed; }
61ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int CharIndexFromTextIndex(int TextIndex) const override;
62ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int TextIndexFromCharIndex(int CharIndex) const override;
63ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int CountChars() const override;
64ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override;
65ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void GetRectArray(int start,
66ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    int nCount,
67ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    CFX_RectArray& rectArray) const override;
68ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int GetIndexAtPos(CPDF_Point point,
69ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    FX_FLOAT xTolerance,
70ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    FX_FLOAT yTolerance) const override;
71ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int GetIndexAtPos(FX_FLOAT x,
72ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    FX_FLOAT y,
73ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    FX_FLOAT xTolerance,
74ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    FX_FLOAT yTolerance) const override;
75ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override;
76ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void GetRectsArrayByRect(const CFX_FloatRect& rect,
77ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           CFX_RectArray& resRectArray) const override;
78ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideString GetPageText(int start = 0, int nCount = -1) const override;
79ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int CountRects(int start, int nCount) override;
80ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void GetRect(int rectIndex,
81ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               FX_FLOAT& left,
82ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               FX_FLOAT& top,
83ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               FX_FLOAT& right,
84ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann               FX_FLOAT& bottom) const override;
85ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override;
86ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override;
87ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int CountBoundedSegments(FX_FLOAT left,
88ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           FX_FLOAT top,
89ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           FX_FLOAT right,
90ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           FX_FLOAT bottom,
91ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           FX_BOOL bContains = FALSE) override;
92ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void GetBoundedSegment(int index, int& start, int& count) const override;
93ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int GetWordBreak(int index, int direction) const override;
94ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
95ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; }
96ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1,
97ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                                 const CFX_FloatRect& rect2);
98ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  static FX_BOOL IsLetter(FX_WCHAR unicode);
99ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
100ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann private:
101ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL IsHyphen(FX_WCHAR curChar);
102ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  bool IsControlChar(const PAGECHAR_INFO& charInfo);
103ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
104ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void ProcessObject();
105ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void ProcessFormObject(CPDF_FormObject* pFormObj,
106ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                         const CFX_Matrix& formMatrix);
107ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void ProcessTextObject(PDFTEXT_Obj pObj);
108ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void ProcessTextObject(CPDF_TextObject* pTextObj,
109ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                         const CFX_Matrix& formMatrix,
110ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                         FX_POSITION ObjPos);
111ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int ProcessInsertObject(const CPDF_TextObject* pObj,
112ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                          const CFX_Matrix& formMatrix);
113ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
114ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos);
115ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1,
116ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           CPDF_TextObject* pTextObj2);
117ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const;
118ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void CloseTempLine();
119ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str);
120ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int32_t PreMarkedContent(PDFTEXT_Obj pObj);
121ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void ProcessMarkedContent(PDFTEXT_Obj pObj);
122ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const;
123ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void FindPreviousTextObject(void);
124ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void AddCharInfoByLRDirection(CFX_WideString& str, int i);
125ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void AddCharInfoByRLDirection(CFX_WideString& str, int i);
126ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
127ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int32_t FindTextlineFlowDirection();
128ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
129ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend);
130ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
131ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                        const CPDF_Font* pFont,
132ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                        int nItems) const;
133ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
134ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CPDFText_ParseOptions m_ParseOptions;
135ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WordArray m_CharIndex;
136ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  const CPDF_PageObjects* const m_pPage;
137ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  PAGECHAR_InfoArray m_charList;
138ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideTextBuf m_TextBuf;
139ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  PAGECHAR_InfoArray m_TempCharList;
140ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideTextBuf m_TempTextBuf;
141ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  const int m_parserflag;
142ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CPDF_TextObject* m_pPreTextObj;
143ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_Matrix m_perMatrix;
144ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  bool m_bIsParsed;
145ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_Matrix m_DisplayMatrix;
146ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  SEGMENT_Array m_Segment;
147ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_RectArray m_SelRects;
148ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  LINEOBJ m_LineObj;
149ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int32_t m_TextlineDir;
150ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_FloatRect m_CurlineRect;
151e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov};
152ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
153ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_TextPageFind : public IPDF_TextPageFind {
154ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann public:
155ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
156ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  ~CPDF_TextPageFind() override {}
157ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
158ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // IPDF_TextPageFind
159ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL FindFirst(const CFX_WideString& findwhat,
160ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    int flags,
161ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                    int startPos = 0) override;
162ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL FindNext() override;
163ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL FindPrev() override;
164ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void GetRectArray(CFX_RectArray& rects) const override;
165ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int GetCurOrder() const override;
166ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int GetMatchedCount() const override;
167ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
168ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann protected:
169ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void ExtractFindWhat(const CFX_WideString& findwhat);
170ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText,
171ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           int startPos,
172ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           int endPos);
173ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL ExtractSubString(CFX_WideString& rString,
174ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           const FX_WCHAR* lpszFullString,
175ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           int iSubString,
176ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           FX_WCHAR chSep);
177ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideString MakeReverse(const CFX_WideString& str);
178ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int ReverseFind(const CFX_WideString& csPageText,
179ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                  const CFX_WideString& csWord,
180ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                  int nStartPos,
181ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                  int& WordLength);
182ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int GetCharIndex(int index) const;
183ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
184ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann private:
185ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WordArray m_CharIndex;
186ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  const IPDF_TextPage* m_pTextPage;
187ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideString m_strText;
188ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideString m_findWhat;
189ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_flags;
190ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideStringArray m_csFindWhatArray;
191ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_findNextStart;
192ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_findPreStart;
193ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL m_bMatchCase;
194ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL m_bMatchWholeWord;
195ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_resStart;
196ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_resEnd;
197ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_RectArray m_resArray;
198ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL m_IsFind;
199e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov};
200ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
201ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_LinkExt {
202ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann public:
203ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CPDF_LinkExt() {}
204ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_Start;
205ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int m_Count;
206ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideString m_strUrl;
207ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  virtual ~CPDF_LinkExt() {}
208e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov};
209ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
210e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
211ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
212ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_LinkExtract : public IPDF_LinkExtract {
213ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann public:
214ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CPDF_LinkExtract();
215ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  ~CPDF_LinkExtract() override;
216ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
217ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // IPDF_LinkExtract
218ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override;
219ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int CountLinks() const override;
220ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideString GetURL(int index) const override;
221ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void GetBoundedSegment(int index, int& start, int& count) const override;
222ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void GetRects(int index, CFX_RectArray& rects) const override;
223ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
224ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL IsExtract() const { return m_bIsParsed; }
225ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
226ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann protected:
227ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void ParseLink();
228ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void DeleteLinkList();
229ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_BOOL CheckWebLink(CFX_WideString& strBeCheck);
230ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  bool CheckMailLink(CFX_WideString& str);
231ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  void AppendToLinkList(int start, int count, const CFX_WideString& strUrl);
232ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
233ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann private:
234ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  LINK_InfoArray m_LinkList;
235ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  const CPDF_TextPage* m_pTextPage;
236ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  CFX_WideString m_strPageText;
237ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  bool m_bIsParsed;
238e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov};
239ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
240ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. MoltmannFX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst);
241e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid NormalizeString(CFX_WideString& str);
242e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest);
243ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid GetTextStream_Unicode(CFX_WideTextBuf& buffer,
244ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           CPDF_PageObjects* pPage,
245ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           FX_BOOL bUseLF,
246ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                           CFX_PtrArray* pObjArray);
247e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
248e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#endif  // CORE_SRC_FPDFTEXT_TEXT_INT_H_
249