1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
8#define CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
9
10#include "core/include/fpdfapi/fpdf_parser.h"
11
12class CPDF_Page;
13class CPDF_PageObjects;
14class CPDF_TextObject;
15class IPDF_LinkExtract;
16class IPDF_ReflowedPage;
17class IPDF_TextPage;
18class IPDF_TextPageFind;
19
20#define PDF2TXT_AUTO_ROTATE 1
21#define PDF2TXT_AUTO_WIDTH 2
22#define PDF2TXT_KEEP_COLUMN 4
23#define PDF2TXT_USE_OCR 8
24#define PDF2TXT_INCLUDE_INVISIBLE 16
25void PDF_GetPageText(CFX_ByteStringArray& lines,
26                     CPDF_Document* pDoc,
27                     CPDF_Dictionary* pPage,
28                     int iMinWidth,
29                     FX_DWORD flags);
30void PDF_GetPageText_Unicode(CFX_WideStringArray& lines,
31                             CPDF_Document* pDoc,
32                             CPDF_Dictionary* pPage,
33                             int iMinWidth,
34                             FX_DWORD flags);
35void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer,
36                               CPDF_Document* pDoc,
37                               CPDF_Dictionary* pPage,
38                               FX_DWORD flags);
39CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc,
40                                            CPDF_Dictionary* pPage);
41#define CHAR_ERROR -1
42#define CHAR_NORMAL 0
43#define CHAR_GENERATED 1
44#define CHAR_UNUNICODE 2
45
46struct FPDF_CHAR_INFO {
47  FX_WCHAR m_Unicode;
48  FX_WCHAR m_Charcode;
49  int32_t m_Flag;
50  FX_FLOAT m_FontSize;
51  FX_FLOAT m_OriginX;
52  FX_FLOAT m_OriginY;
53  CFX_FloatRect m_CharBox;
54  CPDF_TextObject* m_pTextObj;
55  CFX_Matrix m_Matrix;
56};
57
58typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray;
59#define FPDFTEXT_LRTB 0
60#define FPDFTEXT_RLTB 1
61#define FPDFTEXT_TBRL 2
62#define FPDFTEXT_LEFT -1
63#define FPDFTEXT_RIGHT 1
64#define FPDFTEXT_UP -2
65#define FPDFTEXT_DOWN 2
66#define FPDFTEXT_WRITINGMODE_UNKNOW 0
67#define FPDFTEXT_WRITINGMODE_LRTB 1
68#define FPDFTEXT_WRITINGMODE_RLTB 2
69#define FPDFTEXT_WRITINGMODE_TBRL 3
70class CPDFText_ParseOptions {
71 public:
72  CPDFText_ParseOptions();
73  FX_BOOL m_bGetCharCodeOnly;
74  FX_BOOL m_bNormalizeObjs;
75  FX_BOOL m_bOutputHyphen;
76};
77
78class IPDF_TextPage {
79 public:
80  static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, int flags = 0);
81  static IPDF_TextPage* CreateReflowTextPage(IPDF_ReflowedPage* pRefPage);
82
83  virtual ~IPDF_TextPage() {}
84
85  virtual void NormalizeObjects(FX_BOOL bNormalize) = 0;
86
87  virtual FX_BOOL ParseTextPage() = 0;
88
89  virtual bool IsParsed() const = 0;
90
91  virtual int CharIndexFromTextIndex(int TextIndex) const = 0;
92
93  virtual int TextIndexFromCharIndex(int CharIndex) const = 0;
94
95  virtual int CountChars() const = 0;
96
97  virtual void GetCharInfo(int index, FPDF_CHAR_INFO* info) const = 0;
98
99  virtual void GetRectArray(int start,
100                            int nCount,
101                            CFX_RectArray& rectArray) const = 0;
102
103  virtual int GetIndexAtPos(CPDF_Point point,
104                            FX_FLOAT xTolerance,
105                            FX_FLOAT yTolerance) const = 0;
106
107  virtual int GetIndexAtPos(FX_FLOAT x,
108                            FX_FLOAT y,
109                            FX_FLOAT xTolerance,
110                            FX_FLOAT yTolerance) const = 0;
111
112  virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const = 0;
113
114  virtual void GetRectsArrayByRect(const CFX_FloatRect& rect,
115                                   CFX_RectArray& resRectArray) const = 0;
116
117  virtual int CountRects(int start, int nCount) = 0;
118
119  virtual void GetRect(int rectIndex,
120                       FX_FLOAT& left,
121                       FX_FLOAT& top,
122                       FX_FLOAT& right,
123                       FX_FLOAT& bottom) const = 0;
124
125  virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) = 0;
126
127  virtual FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) = 0;
128
129  virtual int CountBoundedSegments(FX_FLOAT left,
130                                   FX_FLOAT top,
131                                   FX_FLOAT right,
132                                   FX_FLOAT bottom,
133                                   FX_BOOL bContains = FALSE) = 0;
134
135  virtual void GetBoundedSegment(int index, int& start, int& count) const = 0;
136
137  virtual int GetWordBreak(int index, int direction) const = 0;
138
139  virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const = 0;
140};
141
142#define FPDFTEXT_MATCHCASE 0x00000001
143#define FPDFTEXT_MATCHWHOLEWORD 0x00000002
144#define FPDFTEXT_CONSECUTIVE 0x00000004
145class IPDF_TextPageFind {
146 public:
147  virtual ~IPDF_TextPageFind() {}
148
149  static IPDF_TextPageFind* CreatePageFind(const IPDF_TextPage* pTextPage);
150
151 public:
152  virtual FX_BOOL FindFirst(const CFX_WideString& findwhat,
153                            int flags,
154                            int startPos = 0) = 0;
155
156  virtual FX_BOOL FindNext() = 0;
157
158  virtual FX_BOOL FindPrev() = 0;
159
160  virtual void GetRectArray(CFX_RectArray& rects) const = 0;
161
162  virtual int GetCurOrder() const = 0;
163
164  virtual int GetMatchedCount() const = 0;
165};
166class IPDF_LinkExtract {
167 public:
168  virtual ~IPDF_LinkExtract() {}
169
170  static IPDF_LinkExtract* CreateLinkExtract();
171
172  virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) = 0;
173
174 public:
175  virtual int CountLinks() const = 0;
176
177  virtual CFX_WideString GetURL(int index) const = 0;
178
179  virtual void GetBoundedSegment(int index, int& start, int& count) const = 0;
180
181  virtual void GetRects(int index, CFX_RectArray& rects) const = 0;
182};
183
184#endif  // CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
185