1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved.
2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be
3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file.
4e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
7e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfapi/fpdf_page.h"
8e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfapi/fpdf_pageobj.h"
9e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdftext/fpdf_text.h"
10e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "txtproc.h"
11e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "text_int.h"
12e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovextern FX_LPCSTR FCS_GetAltStr(FX_WCHAR);
13e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, int destcp, FX_LPCSTR defchar)
14e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
15e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (destcp == 0) {
16e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (unicode < 0x80) {
17e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return CFX_ByteString((char)unicode);
18e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
19e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_LPCSTR altstr = FCS_GetAltStr(unicode);
20e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (altstr) {
21e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return CFX_ByteString(altstr, -1);
22e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
23e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return CFX_ByteString(defchar, -1);
24e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
25e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_BOOL bDef = FALSE;
26e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    char buf[10];
27e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 10, NULL, &bDef);
28e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (ret && !bDef) {
29e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return CFX_ByteString(buf, ret);
30e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
31e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_LPCSTR altstr = FCS_GetAltStr(unicode);
32e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (altstr) {
33e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return CFX_ByteString(altstr, -1);
34e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
35e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return CFX_ByteString(defchar, -1);
36e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
37e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextPage::CTextPage()
38e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
39e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
40e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextPage::~CTextPage()
41e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
42e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int i;
43e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_BaseLines.GetSize(); i ++) {
44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
45e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        delete pBaseLine;
46e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
47e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_TextColumns.GetSize(); i ++) {
48e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i);
49e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        delete pTextColumn;
50e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
51e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
52e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextPage::ProcessObject(CPDF_PageObject* pObject)
53e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
54e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (pObject->m_Type != PDFPAGE_TEXT) {
55e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
56e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
57e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_TextObject* pText = (CPDF_TextObject*)pObject;
58e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_Font* pFont = pText->m_TextState.GetFont();
59e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int count = pText->CountItems();
60e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2);
61e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->CalcCharPos(pPosArray);
62e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
63e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT fontsize_h = pText->m_TextState.GetFontSizeH();
64e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT fontsize_v = pText->m_TextState.GetFontSizeV();
65e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
66e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT spacew = 0;
67e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (space_charcode != -1) {
68e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        spacew = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000;
69e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
70e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (spacew == 0) {
71e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        spacew = fontsize_h / 4;
72e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
73e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (pText->m_TextState.GetBaselineAngle() != 0) {
74e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int cc = 0;
75e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CFX_AffineMatrix matrix;
76e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pText->GetTextMatrix(&matrix);
77e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (int i = 0; i < pText->m_nChars; i ++) {
78e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(FX_UINTPTR)pText->m_pCharCodes : pText->m_pCharCodes[i];
79e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (charcode == (FX_DWORD) - 1) {
80e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
81e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
82e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_RECT char_box;
83e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pFont->GetCharBBox(charcode, char_box);
84e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT char_left = pPosArray ? pPosArray[cc * 2] : char_box.left * pText->m_TextState.GetFontSize() / 1000;
85e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT char_right = pPosArray ? pPosArray[cc * 2 + 1] : char_box.right * pText->m_TextState.GetFontSize() / 1000;
86e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT char_top = char_box.top * pText->m_TextState.GetFontSize() / 1000;
87e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT char_bottom = char_box.bottom * pText->m_TextState.GetFontSize() / 1000;
88e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            cc ++;
89e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT char_origx, char_origy;
90e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            matrix.Transform(char_left, 0, char_origx, char_origy);
91e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            matrix.TransformRect(char_left, char_right, char_top, char_bottom);
92e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CFX_ByteString str;
93e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pFont->AppendChar(str, charcode);
94e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            InsertTextBox(NULL, char_origy, char_left, char_right, char_top,
95e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                          char_bottom, spacew, fontsize_v, str, pFont);
96e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
97e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pPosArray) {
98e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_Free(pPosArray);
99e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
100e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
101e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
102e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT ratio_h = fontsize_h / pText->m_TextState.GetFontSize();
103e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int ii = 0; ii < count * 2; ii ++) {
104e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pPosArray[ii] *= ratio_h;
105e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
106e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT baseline = pText->m_PosY;
107e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CTextBaseLine* pBaseLine = NULL;
108e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT topy = pText->m_Top;
109e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT bottomy = pText->m_Bottom;
110e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT leftx = pText->m_Left;
111e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int cc = 0;
112e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_ByteString segment;
113e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int space_count = 0;
114e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT last_left = 0, last_right = 0, segment_left = 0, segment_right = 0;
115e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < pText->m_nChars; i ++) {
116e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(FX_UINTPTR)pText->m_pCharCodes : pText->m_pCharCodes[i];
117e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (charcode == (FX_DWORD) - 1) {
118e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
119e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
120e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_FLOAT char_left = pPosArray[cc * 2];
121e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_FLOAT char_right = pPosArray[cc * 2 + 1];
122e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        cc ++;
123e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (char_left < last_left || (char_left - last_right) > spacew / 2) {
124e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right,
125e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                      topy, bottomy, spacew, fontsize_v, segment, pFont);
126e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            segment_left = char_left;
127e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            segment = "";
128e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
129e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (space_count > 1) {
130e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right,
131e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                      topy, bottomy, spacew, fontsize_v, segment, pFont);
132e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            segment = "";
133e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else if (space_count == 1) {
134e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pFont->AppendChar(segment, ' ');
135e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
136e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (segment.GetLength() == 0) {
137e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            segment_left = char_left;
138e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
139e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        segment_right = char_right;
140e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pFont->AppendChar(segment, charcode);
141e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        space_count = 0;
142e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        last_left = char_left;
143e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        last_right = char_right;
144e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
145e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (segment.GetLength())
146e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right,
147e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                  topy, bottomy, spacew, fontsize_v, segment, pFont);
148e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_Free(pPosArray);
149e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
150e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextBaseLine* CTextPage::InsertTextBox(CTextBaseLine* pBaseLine, FX_FLOAT basey, FX_FLOAT leftx,
151e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                        FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT bottomy, FX_FLOAT spacew, FX_FLOAT fontsize_v,
152e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                        CFX_ByteString& str, CPDF_Font* pFont)
153e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
154e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (str.GetLength() == 0) {
155e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return NULL;
156e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
157e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (pBaseLine == NULL) {
158e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int i;
159e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (i = 0; i < m_BaseLines.GetSize(); i ++) {
160e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
161e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (pExistLine->m_BaseLine == basey) {
162e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pBaseLine = pExistLine;
163e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                break;
164e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
165e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (pExistLine->m_BaseLine < basey) {
166e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                break;
167e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
168e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
169e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pBaseLine == NULL) {
170e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pBaseLine = new CTextBaseLine;
171e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pBaseLine->m_BaseLine = basey;
172e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            m_BaseLines.InsertAt(i, pBaseLine);
173e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
174e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
175e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_WideString text;
176e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_LPCSTR pStr = str;
177e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int len = str.GetLength(), offset = 0;
178e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    while (offset < len) {
179e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_DWORD ch = pFont->GetNextChar(pStr, len, offset);
180e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch);
181e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (unicode_str.IsEmpty()) {
182e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            text += (FX_WCHAR)ch;
183e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
184e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        else {
185e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            text += unicode_str;
186e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
187e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
188e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pBaseLine->InsertTextBox(leftx, rightx, topy, bottomy, spacew, fontsize_v, text);
189e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return pBaseLine;
190e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
191e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth)
192e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
193e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT lastheight = -1;
194e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT lastbaseline = -1;
195e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT MinLeftX = 1000000;
196e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT MaxRightX = 0;
197e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int i;
198e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_BaseLines.GetSize(); i ++) {
199e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
200e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_FLOAT leftx, rightx;
201e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pBaseLine->GetWidth(leftx, rightx)) {
202e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (leftx < MinLeftX) {
203e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                MinLeftX = leftx;
204e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
205e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (rightx > MaxRightX) {
206e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                MaxRightX = rightx;
207e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
208e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
209e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
210e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_BaseLines.GetSize(); i ++) {
211e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
212e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pBaseLine->MergeBoxes();
213e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
214e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 1; i < m_BaseLines.GetSize(); i ++) {
215e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
216e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1);
217e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pBaseLine->CanMerge(pPrevLine)) {
218e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pPrevLine->Merge(pBaseLine);
219e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            delete pBaseLine;
220e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            m_BaseLines.RemoveAt(i);
221e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            i --;
222e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
223e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
224e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (m_bAutoWidth) {
225e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int* widths = FX_Alloc(int, m_BaseLines.GetSize());
226e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (i = 0; i < m_BaseLines.GetSize(); i ++) {
227e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            widths[i] = 0;
228e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
229e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int TotalChars = 0;
230e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT TotalWidth = 0;
231e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int minchars;
232e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pBaseLine->CountChars(TotalChars, TotalWidth, minchars);
233e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (TotalChars) {
234e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                FX_FLOAT charwidth = TotalWidth / TotalChars;
235e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                widths[i] = (int)((MaxRightX - MinLeftX) / charwidth);
236e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
237e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (widths[i] > 1000) {
238e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                widths[i] = 1000;
239e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
240e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (widths[i] < minchars) {
241e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                widths[i] = minchars;
242e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
243e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
244e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int AvgWidth = 0, widthcount = 0;
245e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (i = 0; i < m_BaseLines.GetSize(); i ++)
246e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (widths[i]) {
247e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                AvgWidth += widths[i];
248e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                widthcount ++;
249e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
250e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5);
251e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int MaxWidth = 0;
252e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (i = 0; i < m_BaseLines.GetSize(); i ++)
253e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (MaxWidth < widths[i]) {
254e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                MaxWidth = widths[i];
255e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
256e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (MaxWidth > AvgWidth * 6 / 5) {
257e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            MaxWidth = AvgWidth * 6 / 5;
258e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
259e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_Free(widths);
260e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (iMinWidth < MaxWidth) {
261e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            iMinWidth = MaxWidth;
262e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
263e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
264e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_BaseLines.GetSize(); i ++) {
265e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
266e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pBaseLine->MergeBoxes();
267e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
268e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (m_bKeepColumn) {
269e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FindColumns();
270e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
271e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_BaseLines.GetSize(); i ++) {
272e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
273e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (lastheight >= 0) {
274e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine;
275e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) {
276e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                lines.Add(L"");
277e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
278e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
279e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        lastheight = pBaseLine->m_MaxFontSizeV;
280e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        lastbaseline = pBaseLine->m_BaseLine;
281e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CFX_WideString str;
282e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth);
283e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        lines.Add(str);
284e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
285e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
286e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest)
287e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
288e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
289e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_LPWSTR pDst = NULL;
290e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
291e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (nCount < 1 ) {
292e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        sDest += wChar;
293e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
294e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
295e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pDst = new FX_WCHAR[nCount];
296e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_Unicode_GetNormalization(wChar, pDst);
297e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int nIndex = 0; nIndex < nCount; nIndex++) {
298e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        sDest += pDst[nIndex];
299e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
300e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    delete[] pDst;
301e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
302e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid NormalizeString(CFX_WideString& str)
303e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
304e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (str.GetLength() <= 0) {
305e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
306e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
307e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_WideString sBuffer;
308e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    IFX_BidiChar* BidiChar = IFX_BidiChar::Create();
309e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (NULL == BidiChar)	{
310e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
311e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
312e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_WordArray order;
313e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_BOOL bR2L = FALSE;
314e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_INT32 start = 0, count = 0, i = 0;
315e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int nR2L = 0, nL2R = 0;
316e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < str.GetLength(); i++) {
317e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if(BidiChar->AppendChar(str.GetAt(i))) {
318e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
319e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            order.Add(start);
320e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            order.Add(count);
321e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            order.Add(ret);
322e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if(!bR2L) {
323e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if(ret == 2) {
324e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    nR2L++;
325e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                } else if (ret == 1) {
326e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    nL2R++;
327e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
328e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
329e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
330e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
331e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(BidiChar->EndChar()) {
332e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
333e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        order.Add(start);
334e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        order.Add(count);
335e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        order.Add(ret);
336e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if(!bR2L) {
337e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if(ret == 2) {
338e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                nR2L++;
339e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else if(ret == 1) {
340e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                nL2R++;
341e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
342e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
343e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
344e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(nR2L > 0 && nR2L >= nL2R) {
345e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        bR2L = TRUE;
346e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
347e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(bR2L) {
348e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int count = order.GetSize();
349e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for(int j = count - 1; j > 0; j -= 3) {
350e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int ret = order.GetAt(j);
351e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int start = order.GetAt(j - 2);
352e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int count1 = order.GetAt(j - 1);
353e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if(ret == 2 || ret == 0) {
354e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                for(int i = start + count1 - 1; i >= start; i--) {
355e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    NormalizeCompositeChar(str[i], sBuffer);
356e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
357e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else {
358e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                i = j;
359e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                FX_BOOL bSymbol = FALSE;
360e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                while(i > 0 && order.GetAt(i) != 2) {
361e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    bSymbol = !order.GetAt(i);
362e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    i -= 3;
363e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
364e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                int end = start + count1 ;
365e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                int n = 0;
366e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if(bSymbol) {
367e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    n = i + 6;
368e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                } else {
369e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    n = i + 3;
370e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
371e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if(n >= j) {
372e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    for(int m = start; m < end; m++) {
373e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        sBuffer += str[m];
374e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    }
375e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                } else {
376e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    i = j;
377e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    j = n;
378e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    for(; n <= i; n += 3) {
379e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        int start = order.GetAt(n - 2);
380e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        int count1 = order.GetAt(n - 1);
381e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        int end = start + count1 ;
382e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        for(int m = start; m < end; m++) {
383e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                            sBuffer += str[m];
384e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        }
385e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    }
386e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
387e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
388e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
389e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    } else {
390e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int count = order.GetSize();
391e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_BOOL bL2R = FALSE;
392e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for(int j = 0; j < count; j += 3) {
393e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int ret = order.GetAt(j + 2);
394e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int start = order.GetAt(j);
395e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int count1 = order.GetAt(j + 1);
396e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if(ret == 2 || (j == 0 && ret == 0 && !bL2R)) {
397e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                int i = j + 3;
398e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                while(bR2L && i < count) {
399e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    if(order.GetAt(i + 2) == 1) {
400e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        break;
401e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    } else {
402e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        i += 3;
403e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    }
404e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
405e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if(i == 3) {
406e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    j = -3;
407e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    bL2R = TRUE;
408e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    continue;
409e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
410e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                int end = str.GetLength() - 1;
411e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if(i < count) {
412e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    end = order.GetAt(i) - 1;
413e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
414e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                j = i - 3;
415e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                for(int n = end; n >= start; n--) {
416e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    NormalizeCompositeChar(str[i], sBuffer);
417e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
418e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else {
419e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                int end = start + count1 ;
420e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                for(int i = start; i < end; i++) {
421e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    sBuffer += str[i];
422e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
423e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
424e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
425e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
426e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    str.Empty();
427e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    str += sBuffer;
428e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    BidiChar->Release();
429e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
430e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovstatic FX_BOOL IsNumber(CFX_WideString& str)
431e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
432e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < str.GetLength(); i ++) {
433e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_WCHAR ch = str[i];
434e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && ch != ' ') {
435e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return FALSE;
436e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
437e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
438e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return TRUE;
439e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
440e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextPage::FindColumns()
441e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
442e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int i;
443e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_BaseLines.GetSize(); i ++) {
444e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
445e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) {
446e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j);
447e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CTextColumn* pColumn = FindColumn(pTextBox->m_Right);
448e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (pColumn == NULL) {
449e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pColumn = new CTextColumn;
450e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pColumn->m_Count = 1;
451e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pColumn->m_AvgPos = pTextBox->m_Right;
452e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pColumn->m_TextPos = -1;
453e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                m_TextColumns.Add(pColumn);
454e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else {
455e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pColumn->m_AvgPos = (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) /
456e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                    (pColumn->m_Count + 1);
457e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pColumn->m_Count ++;
458e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
459e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
460e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
461e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int mincount = m_BaseLines.GetSize() / 4;
462e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_TextColumns.GetSize(); i ++) {
463e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i);
464e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pTextColumn->m_Count >= mincount) {
465e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
466e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
467e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        delete pTextColumn;
468e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        m_TextColumns.RemoveAt(i);
469e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        i --;
470e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
471e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_BaseLines.GetSize(); i ++) {
472e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
473e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) {
474e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j);
475e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (IsNumber(pTextBox->m_Text)) {
476e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pTextBox->m_pColumn = FindColumn(pTextBox->m_Right);
477e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
478e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
479e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
480e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
481e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextColumn* CTextPage::FindColumn(FX_FLOAT xpos)
482e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
483e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < m_TextColumns.GetSize(); i ++) {
484e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i);
485e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) {
486e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return pColumn;
487e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
488e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
489e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return NULL;
490e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
491e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextPage::BreakSpace(CPDF_TextObject* pTextObj)
492e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
493e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
494e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextBaseLine::CTextBaseLine()
495e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
496e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_Top = -100000;
497e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_Bottom = 100000;
498e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_MaxFontSizeV = 0;
499e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
500e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextBaseLine::~CTextBaseLine()
501e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
502e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < m_TextList.GetSize(); i ++) {
503e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
504e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        delete pText;
505e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
506e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
507e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::InsertTextBox(FX_FLOAT leftx, FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT bottomy,
508e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                  FX_FLOAT spacew, FX_FLOAT fontsize_v, const CFX_WideString& text)
509e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
510e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (m_Top < topy) {
511e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        m_Top = topy;
512e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
513e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (m_Bottom > bottomy) {
514e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        m_Bottom = bottomy;
515e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
516e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (m_MaxFontSizeV < fontsize_v) {
517e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        m_MaxFontSizeV = fontsize_v;
518e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
519e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int i;
520e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_TextList.GetSize(); i ++) {
521e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
522e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pText->m_Left > leftx) {
523e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            break;
524e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
525e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
526e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CTextBox* pText = new CTextBox;
527e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->m_Text = text;
528e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->m_Left = leftx;
529e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->m_Right = rightx;
530e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->m_Top = topy;
531e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->m_Bottom = bottomy;
532e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->m_SpaceWidth = spacew;
533e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->m_FontSizeV = fontsize_v;
534e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText->m_pColumn = NULL;
535e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_TextList.InsertAt(i, pText);
536e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
537e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL GetIntersection(FX_FLOAT low1, FX_FLOAT high1, FX_FLOAT low2, FX_FLOAT high2,
538e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        FX_FLOAT& interlow, FX_FLOAT& interhigh);
539e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther)
540e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
541e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT inter_top, inter_bottom;
542e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top,
543e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                         inter_bottom, inter_top)) {
544e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return FALSE;
545e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
546e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT inter_h = inter_top - inter_bottom;
547e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (inter_h < (m_Top - m_Bottom) / 2 && inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) {
548e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return FALSE;
549e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
550e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine);
551e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < m_TextList.GetSize(); i ++) {
552e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
553e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (int j = 0; j < pOther->m_TextList.GetSize(); j ++) {
554e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j);
555e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT inter_left, inter_right;
556e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!GetIntersection(pText->m_Left, pText->m_Right,
557e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                 pOtherText->m_Left, pOtherText->m_Right, inter_left, inter_right)) {
558e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
559e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
560e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT inter_w = inter_right - inter_left;
561e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (inter_w < pText->m_SpaceWidth / 2 && inter_w < pOtherText->m_SpaceWidth / 2) {
562e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
563e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
564e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (dy >= (pText->m_Bottom - pText->m_Top) / 2 ||
565e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) {
566e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                return FALSE;
567e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
568e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
569e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
570e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return TRUE;
571e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
572e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::Merge(CTextBaseLine* pOther)
573e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
574e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < pOther->m_TextList.GetSize(); i ++) {
575e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i);
576e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom,
577e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                      pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text);
578e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
579e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
580e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx)
581e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
582e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int i;
583e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = 0; i < m_TextList.GetSize(); i ++) {
584e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
585e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pText->m_Text != L" ") {
586e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            break;
587e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
588e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
589e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (i == m_TextList.GetSize()) {
590e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return FALSE;
591e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
592e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
593e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    leftx = pText->m_Left;
594e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (i = m_TextList.GetSize() - 1; i >= 0; i --) {
595e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
596e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pText->m_Text != L" ") {
597e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            break;
598e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
599e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
600e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pText = (CTextBox*)m_TextList.GetAt(i);
601e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    rightx = pText->m_Right;
602e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return TRUE;
603e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
604e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::MergeBoxes()
605e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
606e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int i = 0;
607e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    while (1) {
608e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (i >= m_TextList.GetSize() - 1) {
609e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            break;
610e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
611e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i);
612e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1);
613e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right;
614e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) ?
615e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                          pNextText->m_SpaceWidth : pThisText->m_SpaceWidth;
616e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (spacew > 0.0 && dx < spacew * 2) {
617e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pThisText->m_Right = pNextText->m_Right;
618e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (dx > spacew * 1.5) {
619e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pThisText->m_Text += L"  ";
620e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else if (dx > spacew / 3) {
621e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                pThisText->m_Text += L' ';
622e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
623e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pThisText->m_Text += pNextText->m_Text;
624e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pThisText->m_SpaceWidth = pNextText->m_SpaceWidth == 0.0 ?
625e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                      spacew : pNextText->m_SpaceWidth;
626e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            m_TextList.RemoveAt(i + 1);
627e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            delete pNextText;
628e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else {
629e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            i ++;
630e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
631e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
632e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
633e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::WriteOutput(CFX_WideString& str, FX_FLOAT leftx, FX_FLOAT pagewidth,
634e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                int iTextWidth)
635e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
636e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int lastpos = -1;
637e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < m_TextList.GetSize(); i ++) {
638e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
639e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int xpos;
640e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pText->m_pColumn) {
641e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            xpos = (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + 0.5);
642e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            xpos -= pText->m_Text.GetLength();
643e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else {
644e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5);
645e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
646e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (xpos <= lastpos) {
647e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            xpos = lastpos + 1;
648e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
649e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (int j = lastpos + 1; j < xpos; j ++) {
650e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            str += ' ';
651e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
652e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CFX_WideString sSrc(pText->m_Text);
653e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        NormalizeString(sSrc);
654e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        str += sSrc;
655e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        str += ' ';
656e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        lastpos = xpos + pText->m_Text.GetLength();
657e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
658e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
659e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars)
660e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
661e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    minchars = 0;
662e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < m_TextList.GetSize(); i ++) {
663e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
664e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pText->m_Right - pText->m_Left < 0.002) {
665e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
666e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
667e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        count += pText->m_Text.GetLength();
668e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        width += pText->m_Right - pText->m_Left;
669e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        minchars += pText->m_Text.GetLength() + 1;
670e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
671e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
672e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#define PI 3.1415926535897932384626433832795
673e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovstatic void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox)
674e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
675e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int total_count = 0, rotated_count[3] = {0, 0, 0};
676e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_POSITION pos = page.GetFirstObjectPosition();
677e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    while (pos) {
678e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CPDF_PageObject* pObj = page.GetNextObject(pos);
679e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pObj->m_Type != PDFPAGE_TEXT) {
680e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
681e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
682e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        total_count ++;
683e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CPDF_TextObject* pText = (CPDF_TextObject*)pObj;
684e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_FLOAT angle = pText->m_TextState.GetBaselineAngle();
685e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (angle == 0.0) {
686e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
687e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
688e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int degree = (int)(angle * 180 / PI + 0.5);
689e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (degree % 90) {
690e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
691e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
692e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (degree < 0) {
693e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            degree += 360;
694e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
695e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int index = degree / 90 % 3 - 1;
696e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (index < 0) {
697e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
698e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
699e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        rotated_count[index] ++;
700e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
701e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (total_count == 0) {
702e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
703e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
704e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_AffineMatrix matrix;
705e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (rotated_count[0] > total_count * 2 / 3) {
706e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        matrix.Set(0, -1, 1, 0, 0, page.GetPageHeight());
707e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    } else if (rotated_count[1] > total_count * 2 / 3) {
708e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        matrix.Set(-1, 0, 0, -1, page.GetPageWidth(), page.GetPageHeight());
709e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    } else if (rotated_count[2] > total_count * 2 / 3) {
710e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        matrix.Set(0, 1, -1, 0, page.GetPageWidth(), 0);
711e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    } else {
712e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
713e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
714e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    page.Transform(matrix);
715e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    page_bbox.Transform(&matrix);
716e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
717e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
718e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                             int iMinWidth, FX_DWORD flags)
719e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
720e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    lines.RemoveAll();
721e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (pPage == NULL) {
722e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return;
723e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
724e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_Page page;
725e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    page.Load(pDoc, pPage);
726e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_ParseOptions options;
727e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    options.m_bTextOnly = TRUE;
728e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    options.m_bSeparateForm = FALSE;
729e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    page.ParseContent(&options);
730e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_FloatRect page_bbox = page.GetPageBBox();
731e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (flags & PDF2TXT_AUTO_ROTATE) {
732e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CheckRotate(page, page_bbox);
733e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
734e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CTextPage texts;
735e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH;
736e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN;
737e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    texts.m_bBreakSpace = TRUE;
738e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_POSITION pos = page.GetFirstObjectPosition();
739e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    while (pos) {
740e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CPDF_PageObject* pObject = page.GetNextObject(pos);
741e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) {
742e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right, pObject->m_Top);
743e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (!page_bbox.Contains(rect)) {
744e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
745e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
746e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
747e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        texts.ProcessObject(pObject);
748e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
749e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    texts.WriteOutput(lines, iMinWidth);
750e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
751e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
752e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                     int iMinWidth, FX_DWORD flags)
753e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
754e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    lines.RemoveAll();
755e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_WideStringArray wlines;
756e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags);
757e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < wlines.GetSize(); i ++) {
758e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CFX_WideString wstr = wlines[i];
759e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CFX_ByteString str;
760e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        for (int c = 0; c < wstr.GetLength(); c ++) {
761e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?");
762e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
763e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        lines.Add(str);
764e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
765e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
766e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovextern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage, FX_BOOL bUseLF,
767e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                       CFX_PtrArray* pObjArray);
768e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage, FX_DWORD flags)
769e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
770e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    buffer.EstimateSize(0, 10240);
771e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_Page page;
772e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    page.Load(pDoc, pPage);
773e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_ParseOptions options;
774e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    options.m_bTextOnly = TRUE;
775e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    options.m_bSeparateForm = FALSE;
776e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    page.ParseContent(&options);
777e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL);
778e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
779