1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved.
2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be
3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file.
4e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
7e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfapi/fpdf_pageobj.h"
8e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdftext/fpdf_text.h"
9e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfapi/fpdf_page.h"
10e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovclass CPDF_TextStream
11e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
12e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovpublic:
13e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjArray);
14e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    ~CPDF_TextStream() {}
15e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine);
16e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_WideTextBuf&	m_Buffer;
17e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_BOOL				m_bUseLF;
18e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_PtrArray*		m_pObjArray;
19e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    const CPDF_TextObject*	m_pLastObj;
20e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov};
21e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjArray) : m_Buffer(buffer)
22e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
23e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_pLastObj = NULL;
24e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_bUseLF = bUseLF;
25e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_pObjArray = pObjArray;
26e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
27e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_TextObject* pTextObj2)
28e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
29e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!pTextObj1 || !pTextObj2) {
30e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return FALSE;
31e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
32e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_Right, pTextObj2->m_Top);
33e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_Right, pTextObj1->m_Top);
34e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
35e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return TRUE;
36e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
37e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) {
38e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        rcPreObj.Intersect(rcCurObj);
39e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (rcPreObj.IsEmpty()) {
40e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return FALSE;
41e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
42e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() / 2) {
43e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return FALSE;
44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
45e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) {
46e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return FALSE;
47e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
48e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
49e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int nPreCount = pTextObj2->CountItems();
50e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int nCurCount = pTextObj1->CountItems();
51e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (nPreCount != nCurCount) {
52e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return FALSE;
53e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
54e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (int i = 0; i < nPreCount; i++) {
55e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CPDF_TextObjectItem itemPer, itemCur;
56e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pTextObj2->GetItemInfo(i, &itemPer);
57e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pTextObj1->GetItemInfo(i, &itemCur);
58e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (itemCur.m_CharCode != itemPer.m_CharCode) {
59e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return FALSE;
60e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
61e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
62e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return TRUE;
63e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
64e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovint GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont)
65e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
66e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(charCode == -1) {
67e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return 0;
68e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
69e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int w = pFont->GetCharWidthF(charCode);
70e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(w == 0) {
71e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CFX_ByteString str;
72e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pFont->AppendChar(str, charCode);
73e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        w = pFont->GetStringWidth(str, 1);
74e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if(w == 0) {
75e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_RECT BBox;
76e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            pFont->GetCharBBox(charCode, BBox);
77e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            w = BBox.right - BBox.left;
78e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
79e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
80e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return w;
81e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
82e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovint FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, const CPDF_TextObject* pObj)
83e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
84e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(FPDFText_IsSameTextObject(pPrevObj, pObj)) {
85e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return -1;
86e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
87e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_TextObjectItem item;
88e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int nItem = pPrevObj->CountItems();
89e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pPrevObj->GetItemInfo(nItem - 1, &item);
90e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_WCHAR preChar = 0, curChar = 0;
91e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_WideString wstr = pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
92e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(wstr.GetLength()) {
93e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        preChar = wstr.GetAt(0);
94e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
95e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT last_pos = item.m_OriginX;
96e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont());
97e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000;
98e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    last_width = FXSYS_fabs(last_width);
99e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pObj->GetItemInfo(0, &item);
100e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
101e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(wstr.GetLength()) {
102e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        curChar = wstr.GetAt(0);
103e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
104e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont());
105e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000;
106e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    this_width = FXSYS_fabs(this_width);
107e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width / 4;
108e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_AffineMatrix prev_matrix, prev_reverse;
109e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pPrevObj->GetTextMatrix(&prev_matrix);
110e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    prev_reverse.SetReverse(prev_matrix);
111e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY();
112e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    prev_reverse.Transform(x, y);
113e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (FXSYS_fabs(y) > threshold * 2) {
114e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return 2;
115e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
116e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth);
117e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 :  threshold / 5) : (threshold / 2);
118e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) : FXSYS_fabs(pObj->GetFontSize());
119e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    threshold /= 1000;
120e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && preChar != L' ')
121e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if(curChar != L' ' && preChar != L' ') {
122e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if((x - last_pos - last_width) > threshold || (last_pos - x - last_width) > threshold) {
123e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                return 1;
124e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
125e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if(x < 0 && (last_pos - x - last_width) > threshold) {
126e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                return 1;
127e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
128e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if((x - last_pos - last_width) > this_width || (x - last_pos - this_width) > last_width ) {
129e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                return 1;
130e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
131e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
132e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if(last_pos + last_width > x + this_width && curChar == L' ') {
133e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        return 3;
134e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
135e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return 0;
136e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
137e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine)
138e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
139e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_Font* pFont = pObj->GetFont();
140e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_AffineMatrix matrix;
141e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    pObj->GetTextMatrix(&matrix);
142e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int item_index = 0;
143e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (m_pLastObj) {
144e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        int result = FPDFText_ProcessInterObj(m_pLastObj, pObj);
145e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (result == 2) {
146e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int len = m_Buffer.GetLength();
147e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') {
148e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                m_Buffer.Delete(len - 1, 1);
149e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if (m_pObjArray) {
150e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    m_pObjArray->RemoveAt((len - 1) * 2, 2);
151e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
152e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else {
153e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if (bFirstLine) {
154e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    return TRUE;
155e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
156e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if (m_bUseLF) {
157e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    m_Buffer.AppendChar(L'\r');
158e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    m_Buffer.AppendChar(L'\n');
159e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    if (m_pObjArray) {
160e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        for (int i = 0; i < 4; i ++) {
161e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                            m_pObjArray->Add(NULL);
162e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        }
163e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    }
164e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                } else {
165e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    m_Buffer.AppendChar(' ');
166e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    if (m_pObjArray) {
167e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        m_pObjArray->Add(NULL);
168e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                        m_pObjArray->Add(NULL);
169e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    }
170e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
171e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
172e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else if (result == 1) {
173e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            m_Buffer.AppendChar(L' ');
174e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (m_pObjArray) {
175e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                m_pObjArray->Add(NULL);
176e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                m_pObjArray->Add(NULL);
177e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
178e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else if (result == -1) {
179e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            m_pLastObj = pObj;
180e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            return FALSE;
181e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else if (result == 3) {
182e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            item_index = 1;
183e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
184e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
185e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    m_pLastObj = pObj;
186e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    int nItems = pObj->CountItems();
187e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT Ignorekerning = 0;
188e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for(int i = 1; i < nItems - 1; i += 2) {
189e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CPDF_TextObjectItem item;
190e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pObj->GetItemInfo(i, &item);
191e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (item.m_CharCode == (FX_DWORD) - 1) {
192e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if(i == 1) {
193e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                Ignorekerning = item.m_OriginX;
194e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else if(Ignorekerning > item.m_OriginX) {
195e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                Ignorekerning = item.m_OriginX;
196e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
197e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else {
198e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            Ignorekerning = 0;
199e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            break;
200e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
201e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
202e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_FLOAT spacing = 0;
203e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    for (; item_index < nItems; item_index ++) {
204e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CPDF_TextObjectItem item;
205e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        pObj->GetItemInfo(item_index, &item);
206e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (item.m_CharCode == (FX_DWORD) - 1) {
207e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            CFX_WideString wstr = m_Buffer.GetWideString();
208e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') {
209e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
210e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
211e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH();
212e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000;
213e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
214e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
215e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace;
216e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if(nItems > 3 && !spacing) {
217e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            charSpace = 0;
218e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
219e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if((spacing || charSpace) && item_index > 0) {
220e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            int last_width = 0;
221e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH();
222e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
223e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            FX_FLOAT threshold = 0;
224e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (space_charcode != -1) {
225e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000 ;
226e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
227e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if(threshold > fontsize_h / 3) {
228e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                threshold = 0;
229e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else {
230e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                threshold /= 2;
231e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
232e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (threshold == 0) {
233e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                threshold = fontsize_h;
234e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
235e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX_FLOAT)last_width;
236e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                int nDivide = 6;
237e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if (threshold < 300) {
238e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    nDivide = 2;
239e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                } else if (threshold < 500) {
240e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    nDivide = 4;
241e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                } else if (threshold < 700) {
242e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    nDivide = 5;
243e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
244e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                threshold = threshold / nDivide;
245e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                threshold = fontsize_h * threshold / 1000;
246e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
247e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if(charSpace > 0.001) {
248e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                spacing += matrix.TransformDistance(charSpace);
249e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            } else if(charSpace < -0.001) {
250e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace));
251e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
252e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (threshold && (spacing && spacing >= threshold) ) {
253e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                m_Buffer.AppendChar(L' ');
254e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                if (m_pObjArray) {
255e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    m_pObjArray->Add(NULL);
256e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    m_pObjArray->Add(NULL);
257e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
258e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
259e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (item.m_CharCode == (FX_DWORD) - 1) {
260e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                continue;
261e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
262e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            spacing = 0;
263e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
264e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode);
265e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (unicode_str.IsEmpty()) {
266e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode);
267e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (m_pObjArray) {
268e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                m_pObjArray->Add((void*)pObj);
269e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                m_pObjArray->Add((void*)(FX_INTPTR)item_index);
270e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
271e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        } else {
272e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            m_Buffer << unicode_str;
273e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            if (m_pObjArray) {
274e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                for (int i = 0; i < unicode_str.GetLength(); i ++) {
275e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    m_pObjArray->Add((void*)pObj);
276e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                    m_pObjArray->Add((void*)(FX_INTPTR)item_index);
277e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                }
278e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            }
279e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
280e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
281e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return FALSE;
282e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
283e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage, FX_BOOL bUseLF,
284e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov                                CFX_PtrArray* pObjArray)
285e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
286e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_TextStream textstream(buffer, bUseLF, pObjArray);
287e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_POSITION pos = pPage->GetFirstObjectPosition();
288e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    while (pos) {
289e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CPDF_PageObject* pObject = pPage->GetNextObject(pos);
290e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pObject == NULL) {
291e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
292e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
293e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pObject->m_Type != PDFPAGE_TEXT) {
294e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
295e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
296e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE);
297e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
298e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
299e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage)
300e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{
301e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CFX_WideTextBuf buffer;
302e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    buffer.EstimateSize(0, 1024);
303e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_Page page;
304e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    page.Load(pDoc, pPage);
305e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_ParseOptions options;
306e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    options.m_bTextOnly = TRUE;
307e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    options.m_bSeparateForm = FALSE;
308e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    page.ParseContent(&options);
309e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    CPDF_TextStream textstream(buffer, FALSE, NULL);
310e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    FX_POSITION pos = page.GetFirstObjectPosition();
311e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    while (pos) {
312e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        CPDF_PageObject* pObject = page.GetNextObject(pos);
313e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (pObject->m_Type != PDFPAGE_TEXT) {
314e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            continue;
315e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
316e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) {
317e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov            break;
318e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov        }
319e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
320e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    return buffer.GetWideString();
321e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
322