1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved. 2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be 3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file. 4ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 7e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ 8e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#define CORE_SRC_FPDFTEXT_TEXT_INT_H_ 9e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 10ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "core/include/fpdftext/fpdf_text.h" 11ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "core/include/fxcrt/fx_basic.h" 12ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 13ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CFX_BidiChar; 14ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_DocProgressiveSearch; 15ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_FormObject; 16e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovclass CPDF_LinkExtract; 17e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovclass CPDF_TextPageFind; 18ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 19ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_ERROR -1 20ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_NORMAL 0 21ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_GENERATED 1 22ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_UNUNICODE 2 23ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_HYPHEN 3 24ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_CHAR_PIECE 4 25ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_MC_PASS 0 26ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_MC_DONE 1 27ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#define FPDFTEXT_MC_DELAY 2 28ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 29e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef struct _PAGECHAR_INFO { 30ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_CharCode; 31ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_WCHAR m_Unicode; 32ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT m_OriginX; 33ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT m_OriginY; 34ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int32_t m_Flag; 35ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_FloatRect m_CharBox; 36ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDF_TextObject* m_pTextObj; 37ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_Matrix m_Matrix; 38ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_Index; 39e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} PAGECHAR_INFO; 40ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmanntypedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray; 41e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef struct { 42ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_Start; 43ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_nCount; 44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} FPDF_SEGMENT; 45e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; 46e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef struct { 47ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDF_TextObject* m_pTextObj; 48ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_Matrix m_formMatrix; 49e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} PDFTEXT_Obj; 50e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; 51ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 52ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_TextPage : public IPDF_TextPage { 53ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann public: 54ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDF_TextPage(const CPDF_Page* pPage, int flags); 55ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ~CPDF_TextPage() override {} 56ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 57ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann // IPDF_TextPage 58ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL ParseTextPage() override; 59ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void NormalizeObjects(FX_BOOL bNormalize) override; 60ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool IsParsed() const override { return m_bIsParsed; } 61ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int CharIndexFromTextIndex(int TextIndex) const override; 62ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int TextIndexFromCharIndex(int CharIndex) const override; 63ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int CountChars() const override; 64ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override; 65ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void GetRectArray(int start, 66ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int nCount, 67ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_RectArray& rectArray) const override; 68ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int GetIndexAtPos(CPDF_Point point, 69ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT xTolerance, 70ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT yTolerance) const override; 71ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int GetIndexAtPos(FX_FLOAT x, 72ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT y, 73ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT xTolerance, 74ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT yTolerance) const override; 75ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override; 76ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void GetRectsArrayByRect(const CFX_FloatRect& rect, 77ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_RectArray& resRectArray) const override; 78ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString GetPageText(int start = 0, int nCount = -1) const override; 79ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int CountRects(int start, int nCount) override; 80ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void GetRect(int rectIndex, 81ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT& left, 82ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT& top, 83ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT& right, 84ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT& bottom) const override; 85ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; 86ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; 87ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int CountBoundedSegments(FX_FLOAT left, 88ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT top, 89ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT right, 90ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_FLOAT bottom, 91ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL bContains = FALSE) override; 92ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void GetBoundedSegment(int index, int& start, int& count) const override; 93ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int GetWordBreak(int index, int direction) const override; 94ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 95ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } 96ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, 97ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const CFX_FloatRect& rect2); 98ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann static FX_BOOL IsLetter(FX_WCHAR unicode); 99ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 100ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann private: 101ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL IsHyphen(FX_WCHAR curChar); 102ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool IsControlChar(const PAGECHAR_INFO& charInfo); 103ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); 104ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void ProcessObject(); 105ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void ProcessFormObject(CPDF_FormObject* pFormObj, 106ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const CFX_Matrix& formMatrix); 107ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void ProcessTextObject(PDFTEXT_Obj pObj); 108ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void ProcessTextObject(CPDF_TextObject* pTextObj, 109ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const CFX_Matrix& formMatrix, 110ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_POSITION ObjPos); 111ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int ProcessInsertObject(const CPDF_TextObject* pObj, 112ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const CFX_Matrix& formMatrix); 113ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); 114ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); 115ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, 116ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDF_TextObject* pTextObj2); 117ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const; 118ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void CloseTempLine(); 119ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str); 120ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int32_t PreMarkedContent(PDFTEXT_Obj pObj); 121ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void ProcessMarkedContent(PDFTEXT_Obj pObj); 122ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; 123ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void FindPreviousTextObject(void); 124ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void AddCharInfoByLRDirection(CFX_WideString& str, int i); 125ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void AddCharInfoByRLDirection(CFX_WideString& str, int i); 126ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); 127ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int32_t FindTextlineFlowDirection(); 128ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 129ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); 130ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, 131ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const CPDF_Font* pFont, 132ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int nItems) const; 133ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 134ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDFText_ParseOptions m_ParseOptions; 135ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WordArray m_CharIndex; 136ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const CPDF_PageObjects* const m_pPage; 137ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann PAGECHAR_InfoArray m_charList; 138ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideTextBuf m_TextBuf; 139ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann PAGECHAR_InfoArray m_TempCharList; 140ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideTextBuf m_TempTextBuf; 141ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const int m_parserflag; 142ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDF_TextObject* m_pPreTextObj; 143ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_Matrix m_perMatrix; 144ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool m_bIsParsed; 145ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_Matrix m_DisplayMatrix; 146ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann SEGMENT_Array m_Segment; 147ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_RectArray m_SelRects; 148ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann LINEOBJ m_LineObj; 149ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int32_t m_TextlineDir; 150ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_FloatRect m_CurlineRect; 151e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}; 152ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 153ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_TextPageFind : public IPDF_TextPageFind { 154ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann public: 155ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); 156ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ~CPDF_TextPageFind() override {} 157ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 158ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann // IPDF_TextPageFind 159ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL FindFirst(const CFX_WideString& findwhat, 160ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int flags, 161ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int startPos = 0) override; 162ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL FindNext() override; 163ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL FindPrev() override; 164ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void GetRectArray(CFX_RectArray& rects) const override; 165ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int GetCurOrder() const override; 166ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int GetMatchedCount() const override; 167ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 168ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann protected: 169ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void ExtractFindWhat(const CFX_WideString& findwhat); 170ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, 171ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int startPos, 172ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int endPos); 173ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL ExtractSubString(CFX_WideString& rString, 174ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const FX_WCHAR* lpszFullString, 175ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int iSubString, 176ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_WCHAR chSep); 177ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString MakeReverse(const CFX_WideString& str); 178ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int ReverseFind(const CFX_WideString& csPageText, 179ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const CFX_WideString& csWord, 180ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int nStartPos, 181ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int& WordLength); 182ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int GetCharIndex(int index) const; 183ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 184ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann private: 185ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WordArray m_CharIndex; 186ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const IPDF_TextPage* m_pTextPage; 187ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString m_strText; 188ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString m_findWhat; 189ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_flags; 190ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideStringArray m_csFindWhatArray; 191ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_findNextStart; 192ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_findPreStart; 193ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL m_bMatchCase; 194ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL m_bMatchWholeWord; 195ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_resStart; 196ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_resEnd; 197ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_RectArray m_resArray; 198ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL m_IsFind; 199e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}; 200ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 201ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_LinkExt { 202ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann public: 203ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDF_LinkExt() {} 204ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_Start; 205ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int m_Count; 206ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString m_strUrl; 207ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann virtual ~CPDF_LinkExt() {} 208e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}; 209ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 210e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovtypedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; 211ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 212ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannclass CPDF_LinkExtract : public IPDF_LinkExtract { 213ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann public: 214ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDF_LinkExtract(); 215ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann ~CPDF_LinkExtract() override; 216ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 217ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann // IPDF_LinkExtract 218ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override; 219ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann int CountLinks() const override; 220ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString GetURL(int index) const override; 221ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void GetBoundedSegment(int index, int& start, int& count) const override; 222ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void GetRects(int index, CFX_RectArray& rects) const override; 223ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 224ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL IsExtract() const { return m_bIsParsed; } 225ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 226ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann protected: 227ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void ParseLink(); 228ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void DeleteLinkList(); 229ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); 230ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool CheckMailLink(CFX_WideString& str); 231ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann void AppendToLinkList(int start, int count, const CFX_WideString& strUrl); 232ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 233ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann private: 234ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann LINK_InfoArray m_LinkList; 235ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const CPDF_TextPage* m_pTextPage; 236ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString m_strPageText; 237ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann bool m_bIsParsed; 238e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}; 239ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 240ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. MoltmannFX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); 241e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid NormalizeString(CFX_WideString& str); 242e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); 243ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannvoid GetTextStream_Unicode(CFX_WideTextBuf& buffer, 244ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CPDF_PageObjects* pPage, 245ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_BOOL bUseLF, 246ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_PtrArray* pObjArray); 247e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 248e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ 249