1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved. 2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be 3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file. 4e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 7e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfapi/fpdf_pageobj.h" 8e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdftext/fpdf_text.h" 9e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfapi/fpdf_page.h" 10e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovclass CPDF_TextStream 11e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 12e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovpublic: 13e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjArray); 14e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov ~CPDF_TextStream() {} 15e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine); 16e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideTextBuf& m_Buffer; 17e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_BOOL m_bUseLF; 18e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_PtrArray* m_pObjArray; 19e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov const CPDF_TextObject* m_pLastObj; 20e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}; 21e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjArray) : m_Buffer(buffer) 22e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 23e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pLastObj = NULL; 24e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_bUseLF = bUseLF; 25e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray = pObjArray; 26e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 27e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_TextObject* pTextObj2) 28e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 29e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (!pTextObj1 || !pTextObj2) { 30e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 31e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 32e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_Right, pTextObj2->m_Top); 33e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_Right, pTextObj1->m_Top); 34e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { 35e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return TRUE; 36e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 37e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { 38e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov rcPreObj.Intersect(rcCurObj); 39e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (rcPreObj.IsEmpty()) { 40e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 41e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 42e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() / 2) { 43e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 45e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { 46e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 47e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 48e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 49e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int nPreCount = pTextObj2->CountItems(); 50e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int nCurCount = pTextObj1->CountItems(); 51e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (nPreCount != nCurCount) { 52e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 53e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 54e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < nPreCount; i++) { 55e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextObjectItem itemPer, itemCur; 56e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pTextObj2->GetItemInfo(i, &itemPer); 57e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pTextObj1->GetItemInfo(i, &itemCur); 58e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (itemCur.m_CharCode != itemPer.m_CharCode) { 59e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 60e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 61e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 62e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return TRUE; 63e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 64e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovint GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) 65e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 66e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(charCode == -1) { 67e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return 0; 68e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 69e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int w = pFont->GetCharWidthF(charCode); 70e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(w == 0) { 71e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_ByteString str; 72e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pFont->AppendChar(str, charCode); 73e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov w = pFont->GetStringWidth(str, 1); 74e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(w == 0) { 75e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_RECT BBox; 76e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pFont->GetCharBBox(charCode, BBox); 77e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov w = BBox.right - BBox.left; 78e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 79e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 80e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return w; 81e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 82e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovint FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, const CPDF_TextObject* pObj) 83e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 84e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(FPDFText_IsSameTextObject(pPrevObj, pObj)) { 85e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return -1; 86e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 87e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextObjectItem item; 88e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int nItem = pPrevObj->CountItems(); 89e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pPrevObj->GetItemInfo(nItem - 1, &item); 90e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_WCHAR preChar = 0, curChar = 0; 91e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString wstr = pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); 92e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(wstr.GetLength()) { 93e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov preChar = wstr.GetAt(0); 94e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 95e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT last_pos = item.m_OriginX; 96e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont()); 97e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000; 98e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov last_width = FXSYS_fabs(last_width); 99e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pObj->GetItemInfo(0, &item); 100e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); 101e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(wstr.GetLength()) { 102e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov curChar = wstr.GetAt(0); 103e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 104e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); 105e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; 106e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov this_width = FXSYS_fabs(this_width); 107e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width / 4; 108e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_AffineMatrix prev_matrix, prev_reverse; 109e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pPrevObj->GetTextMatrix(&prev_matrix); 110e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov prev_reverse.SetReverse(prev_matrix); 111e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY(); 112e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov prev_reverse.Transform(x, y); 113e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (FXSYS_fabs(y) > threshold * 2) { 114e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return 2; 115e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 116e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); 117e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 : threshold / 5) : (threshold / 2); 118e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) : FXSYS_fabs(pObj->GetFontSize()); 119e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold /= 1000; 120e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && preChar != L' ') 121e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(curChar != L' ' && preChar != L' ') { 122e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if((x - last_pos - last_width) > threshold || (last_pos - x - last_width) > threshold) { 123e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return 1; 124e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 125e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(x < 0 && (last_pos - x - last_width) > threshold) { 126e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return 1; 127e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 128e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if((x - last_pos - last_width) > this_width || (x - last_pos - this_width) > last_width ) { 129e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return 1; 130e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 131e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 132e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(last_pos + last_width > x + this_width && curChar == L' ') { 133e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return 3; 134e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 135e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return 0; 136e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 137e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine) 138e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 139e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_Font* pFont = pObj->GetFont(); 140e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_AffineMatrix matrix; 141e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pObj->GetTextMatrix(&matrix); 142e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int item_index = 0; 143e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_pLastObj) { 144e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); 145e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (result == 2) { 146e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int len = m_Buffer.GetLength(); 147e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { 148e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Buffer.Delete(len - 1, 1); 149e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_pObjArray) { 150e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->RemoveAt((len - 1) * 2, 2); 151e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 152e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 153e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (bFirstLine) { 154e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return TRUE; 155e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 156e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_bUseLF) { 157e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Buffer.AppendChar(L'\r'); 158e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Buffer.AppendChar(L'\n'); 159e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_pObjArray) { 160e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < 4; i ++) { 161e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add(NULL); 162e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 163e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 164e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 165e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Buffer.AppendChar(' '); 166e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_pObjArray) { 167e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add(NULL); 168e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add(NULL); 169e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 170e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 171e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 172e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (result == 1) { 173e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Buffer.AppendChar(L' '); 174e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_pObjArray) { 175e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add(NULL); 176e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add(NULL); 177e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 178e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (result == -1) { 179e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pLastObj = pObj; 180e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 181e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (result == 3) { 182e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov item_index = 1; 183e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 184e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 185e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pLastObj = pObj; 186e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int nItems = pObj->CountItems(); 187e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT Ignorekerning = 0; 188e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(int i = 1; i < nItems - 1; i += 2) { 189e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextObjectItem item; 190e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pObj->GetItemInfo(i, &item); 191e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (item.m_CharCode == (FX_DWORD) - 1) { 192e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(i == 1) { 193e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov Ignorekerning = item.m_OriginX; 194e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if(Ignorekerning > item.m_OriginX) { 195e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov Ignorekerning = item.m_OriginX; 196e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 197e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 198e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov Ignorekerning = 0; 199e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 200e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 201e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 202e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT spacing = 0; 203e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (; item_index < nItems; item_index ++) { 204e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextObjectItem item; 205e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pObj->GetItemInfo(item_index, &item); 206e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (item.m_CharCode == (FX_DWORD) - 1) { 207e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString wstr = m_Buffer.GetWideString(); 208e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') { 209e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 210e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 211e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); 212e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000; 213e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 214e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 215e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace; 216e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(nItems > 3 && !spacing) { 217e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov charSpace = 0; 218e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 219e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if((spacing || charSpace) && item_index > 0) { 220e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int last_width = 0; 221e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); 222e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); 223e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT threshold = 0; 224e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (space_charcode != -1) { 225e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000 ; 226e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 227e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(threshold > fontsize_h / 3) { 228e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold = 0; 229e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 230e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold /= 2; 231e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 232e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (threshold == 0) { 233e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold = fontsize_h; 234e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); 235e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX_FLOAT)last_width; 236e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int nDivide = 6; 237e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (threshold < 300) { 238e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov nDivide = 2; 239e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (threshold < 500) { 240e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov nDivide = 4; 241e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (threshold < 700) { 242e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov nDivide = 5; 243e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 244e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold = threshold / nDivide; 245e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov threshold = fontsize_h * threshold / 1000; 246e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 247e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(charSpace > 0.001) { 248e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov spacing += matrix.TransformDistance(charSpace); 249e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if(charSpace < -0.001) { 250e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); 251e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 252e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (threshold && (spacing && spacing >= threshold) ) { 253e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Buffer.AppendChar(L' '); 254e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_pObjArray) { 255e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add(NULL); 256e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add(NULL); 257e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 258e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 259e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (item.m_CharCode == (FX_DWORD) - 1) { 260e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 261e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 262e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov spacing = 0; 263e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 264e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode); 265e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (unicode_str.IsEmpty()) { 266e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode); 267e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_pObjArray) { 268e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add((void*)pObj); 269e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add((void*)(FX_INTPTR)item_index); 270e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 271e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 272e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Buffer << unicode_str; 273e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_pObjArray) { 274e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < unicode_str.GetLength(); i ++) { 275e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add((void*)pObj); 276e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_pObjArray->Add((void*)(FX_INTPTR)item_index); 277e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 278e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 279e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 280e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 281e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 282e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 283e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage, FX_BOOL bUseLF, 284e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_PtrArray* pObjArray) 285e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 286e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextStream textstream(buffer, bUseLF, pObjArray); 287e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_POSITION pos = pPage->GetFirstObjectPosition(); 288e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov while (pos) { 289e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_PageObject* pObject = pPage->GetNextObject(pos); 290e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pObject == NULL) { 291e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 292e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 293e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pObject->m_Type != PDFPAGE_TEXT) { 294e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 295e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 296e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE); 297e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 298e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 299e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage) 300e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 301e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideTextBuf buffer; 302e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov buffer.EstimateSize(0, 1024); 303e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_Page page; 304e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov page.Load(pDoc, pPage); 305e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_ParseOptions options; 306e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov options.m_bTextOnly = TRUE; 307e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov options.m_bSeparateForm = FALSE; 308e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov page.ParseContent(&options); 309e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextStream textstream(buffer, FALSE, NULL); 310e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_POSITION pos = page.GetFirstObjectPosition(); 311e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov while (pos) { 312e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_PageObject* pObject = page.GetNextObject(pos); 313e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pObject->m_Type != PDFPAGE_TEXT) { 314e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 315e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 316e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) { 317e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 318e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 319e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 320e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return buffer.GetWideString(); 321e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 322