1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved. 2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be 3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file. 4e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 7e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfapi/fpdf_page.h" 8e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdfapi/fpdf_pageobj.h" 9e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "../../include/fpdftext/fpdf_text.h" 10e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "txtproc.h" 11e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#include "text_int.h" 12e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovextern FX_LPCSTR FCS_GetAltStr(FX_WCHAR); 13e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, int destcp, FX_LPCSTR defchar) 14e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 15e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (destcp == 0) { 16e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (unicode < 0x80) { 17e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return CFX_ByteString((char)unicode); 18e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 19e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_LPCSTR altstr = FCS_GetAltStr(unicode); 20e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (altstr) { 21e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return CFX_ByteString(altstr, -1); 22e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 23e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return CFX_ByteString(defchar, -1); 24e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 25e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_BOOL bDef = FALSE; 26e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov char buf[10]; 27e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 10, NULL, &bDef); 28e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (ret && !bDef) { 29e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return CFX_ByteString(buf, ret); 30e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 31e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_LPCSTR altstr = FCS_GetAltStr(unicode); 32e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (altstr) { 33e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return CFX_ByteString(altstr, -1); 34e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 35e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return CFX_ByteString(defchar, -1); 36e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 37e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextPage::CTextPage() 38e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 39e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 40e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextPage::~CTextPage() 41e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 42e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int i; 43e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 45e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov delete pBaseLine; 46e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 47e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_TextColumns.GetSize(); i ++) { 48e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); 49e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov delete pTextColumn; 50e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 51e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 52e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextPage::ProcessObject(CPDF_PageObject* pObject) 53e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 54e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pObject->m_Type != PDFPAGE_TEXT) { 55e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return; 56e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 57e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextObject* pText = (CPDF_TextObject*)pObject; 58e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_Font* pFont = pText->m_TextState.GetFont(); 59e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int count = pText->CountItems(); 60e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); 61e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->CalcCharPos(pPosArray); 62e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 63e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT fontsize_h = pText->m_TextState.GetFontSizeH(); 64e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT fontsize_v = pText->m_TextState.GetFontSizeV(); 65e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); 66e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT spacew = 0; 67e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (space_charcode != -1) { 68e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov spacew = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; 69e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 70e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (spacew == 0) { 71e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov spacew = fontsize_h / 4; 72e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 73e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pText->m_TextState.GetBaselineAngle() != 0) { 74e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int cc = 0; 75e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_AffineMatrix matrix; 76e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->GetTextMatrix(&matrix); 77e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < pText->m_nChars; i ++) { 78e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(FX_UINTPTR)pText->m_pCharCodes : pText->m_pCharCodes[i]; 79e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (charcode == (FX_DWORD) - 1) { 80e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 81e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 82e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_RECT char_box; 83e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pFont->GetCharBBox(charcode, char_box); 84e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT char_left = pPosArray ? pPosArray[cc * 2] : char_box.left * pText->m_TextState.GetFontSize() / 1000; 85e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT char_right = pPosArray ? pPosArray[cc * 2 + 1] : char_box.right * pText->m_TextState.GetFontSize() / 1000; 86e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT char_top = char_box.top * pText->m_TextState.GetFontSize() / 1000; 87e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT char_bottom = char_box.bottom * pText->m_TextState.GetFontSize() / 1000; 88e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov cc ++; 89e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT char_origx, char_origy; 90e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov matrix.Transform(char_left, 0, char_origx, char_origy); 91e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov matrix.TransformRect(char_left, char_right, char_top, char_bottom); 92e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_ByteString str; 93e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pFont->AppendChar(str, charcode); 94e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov InsertTextBox(NULL, char_origy, char_left, char_right, char_top, 95e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov char_bottom, spacew, fontsize_v, str, pFont); 96e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 97e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pPosArray) { 98e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_Free(pPosArray); 99e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 100e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return; 101e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 102e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT ratio_h = fontsize_h / pText->m_TextState.GetFontSize(); 103e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int ii = 0; ii < count * 2; ii ++) { 104e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pPosArray[ii] *= ratio_h; 105e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 106e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT baseline = pText->m_PosY; 107e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = NULL; 108e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT topy = pText->m_Top; 109e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT bottomy = pText->m_Bottom; 110e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT leftx = pText->m_Left; 111e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int cc = 0; 112e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_ByteString segment; 113e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int space_count = 0; 114e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT last_left = 0, last_right = 0, segment_left = 0, segment_right = 0; 115e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < pText->m_nChars; i ++) { 116e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(FX_UINTPTR)pText->m_pCharCodes : pText->m_pCharCodes[i]; 117e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (charcode == (FX_DWORD) - 1) { 118e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 119e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 120e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT char_left = pPosArray[cc * 2]; 121e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT char_right = pPosArray[cc * 2 + 1]; 122e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov cc ++; 123e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (char_left < last_left || (char_left - last_right) > spacew / 2) { 124e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right, 125e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov topy, bottomy, spacew, fontsize_v, segment, pFont); 126e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov segment_left = char_left; 127e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov segment = ""; 128e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 129e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (space_count > 1) { 130e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right, 131e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov topy, bottomy, spacew, fontsize_v, segment, pFont); 132e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov segment = ""; 133e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (space_count == 1) { 134e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pFont->AppendChar(segment, ' '); 135e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 136e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (segment.GetLength() == 0) { 137e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov segment_left = char_left; 138e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 139e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov segment_right = char_right; 140e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pFont->AppendChar(segment, charcode); 141e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov space_count = 0; 142e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov last_left = char_left; 143e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov last_right = char_right; 144e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 145e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (segment.GetLength()) 146e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right, 147e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov topy, bottomy, spacew, fontsize_v, segment, pFont); 148e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_Free(pPosArray); 149e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 150e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextBaseLine* CTextPage::InsertTextBox(CTextBaseLine* pBaseLine, FX_FLOAT basey, FX_FLOAT leftx, 151e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT bottomy, FX_FLOAT spacew, FX_FLOAT fontsize_v, 152e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_ByteString& str, CPDF_Font* pFont) 153e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 154e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (str.GetLength() == 0) { 155e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return NULL; 156e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 157e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pBaseLine == NULL) { 158e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int i; 159e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 160e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 161e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pExistLine->m_BaseLine == basey) { 162e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine = pExistLine; 163e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 164e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 165e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pExistLine->m_BaseLine < basey) { 166e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 167e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 168e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 169e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pBaseLine == NULL) { 170e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine = new CTextBaseLine; 171e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine->m_BaseLine = basey; 172e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_BaseLines.InsertAt(i, pBaseLine); 173e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 174e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 175e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString text; 176e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_LPCSTR pStr = str; 177e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int len = str.GetLength(), offset = 0; 178e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov while (offset < len) { 179e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); 180e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch); 181e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (unicode_str.IsEmpty()) { 182e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov text += (FX_WCHAR)ch; 183e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 184e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov else { 185e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov text += unicode_str; 186e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 187e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 188e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine->InsertTextBox(leftx, rightx, topy, bottomy, spacew, fontsize_v, text); 189e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return pBaseLine; 190e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 191e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) 192e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 193e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT lastheight = -1; 194e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT lastbaseline = -1; 195e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT MinLeftX = 1000000; 196e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT MaxRightX = 0; 197e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int i; 198e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 199e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 200e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT leftx, rightx; 201e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pBaseLine->GetWidth(leftx, rightx)) { 202e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (leftx < MinLeftX) { 203e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov MinLeftX = leftx; 204e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 205e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (rightx > MaxRightX) { 206e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov MaxRightX = rightx; 207e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 208e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 209e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 210e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 211e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 212e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine->MergeBoxes(); 213e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 214e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 1; i < m_BaseLines.GetSize(); i ++) { 215e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 216e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); 217e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pBaseLine->CanMerge(pPrevLine)) { 218e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pPrevLine->Merge(pBaseLine); 219e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov delete pBaseLine; 220e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_BaseLines.RemoveAt(i); 221e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov i --; 222e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 223e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 224e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_bAutoWidth) { 225e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int* widths = FX_Alloc(int, m_BaseLines.GetSize()); 226e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 227e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov widths[i] = 0; 228e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 229e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int TotalChars = 0; 230e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT TotalWidth = 0; 231e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int minchars; 232e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine->CountChars(TotalChars, TotalWidth, minchars); 233e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (TotalChars) { 234e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT charwidth = TotalWidth / TotalChars; 235e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); 236e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 237e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (widths[i] > 1000) { 238e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov widths[i] = 1000; 239e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 240e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (widths[i] < minchars) { 241e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov widths[i] = minchars; 242e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 243e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 244e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int AvgWidth = 0, widthcount = 0; 245e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) 246e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (widths[i]) { 247e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov AvgWidth += widths[i]; 248e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov widthcount ++; 249e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 250e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5); 251e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int MaxWidth = 0; 252e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) 253e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (MaxWidth < widths[i]) { 254e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov MaxWidth = widths[i]; 255e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 256e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (MaxWidth > AvgWidth * 6 / 5) { 257e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov MaxWidth = AvgWidth * 6 / 5; 258e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 259e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_Free(widths); 260e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (iMinWidth < MaxWidth) { 261e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov iMinWidth = MaxWidth; 262e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 263e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 264e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 265e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 266e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine->MergeBoxes(); 267e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 268e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_bKeepColumn) { 269e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FindColumns(); 270e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 271e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 272e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 273e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (lastheight >= 0) { 274e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; 275e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) { 276e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov lines.Add(L""); 277e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 278e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 279e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov lastheight = pBaseLine->m_MaxFontSizeV; 280e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov lastbaseline = pBaseLine->m_BaseLine; 281e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString str; 282e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); 283e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov lines.Add(str); 284e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 285e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 286e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest) 287e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 288e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); 289e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_LPWSTR pDst = NULL; 290e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 291e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (nCount < 1 ) { 292e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov sDest += wChar; 293e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return; 294e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 295e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pDst = new FX_WCHAR[nCount]; 296e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_Unicode_GetNormalization(wChar, pDst); 297e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int nIndex = 0; nIndex < nCount; nIndex++) { 298e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov sDest += pDst[nIndex]; 299e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 300e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov delete[] pDst; 301e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 302e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid NormalizeString(CFX_WideString& str) 303e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 304e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (str.GetLength() <= 0) { 305e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return; 306e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 307e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString sBuffer; 308e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov IFX_BidiChar* BidiChar = IFX_BidiChar::Create(); 309e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (NULL == BidiChar) { 310e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return; 311e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 312e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WordArray order; 313e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_BOOL bR2L = FALSE; 314e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_INT32 start = 0, count = 0, i = 0; 315e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int nR2L = 0, nL2R = 0; 316e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < str.GetLength(); i++) { 317e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(BidiChar->AppendChar(str.GetAt(i))) { 318e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_INT32 ret = BidiChar->GetBidiInfo(start, count); 319e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov order.Add(start); 320e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov order.Add(count); 321e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov order.Add(ret); 322e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(!bR2L) { 323e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(ret == 2) { 324e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov nR2L++; 325e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (ret == 1) { 326e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov nL2R++; 327e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 328e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 329e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 330e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 331e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(BidiChar->EndChar()) { 332e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_INT32 ret = BidiChar->GetBidiInfo(start, count); 333e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov order.Add(start); 334e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov order.Add(count); 335e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov order.Add(ret); 336e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(!bR2L) { 337e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(ret == 2) { 338e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov nR2L++; 339e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if(ret == 1) { 340e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov nL2R++; 341e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 342e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 343e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 344e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(nR2L > 0 && nR2L >= nL2R) { 345e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov bR2L = TRUE; 346e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 347e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(bR2L) { 348e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int count = order.GetSize(); 349e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(int j = count - 1; j > 0; j -= 3) { 350e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int ret = order.GetAt(j); 351e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int start = order.GetAt(j - 2); 352e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int count1 = order.GetAt(j - 1); 353e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(ret == 2 || ret == 0) { 354e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(int i = start + count1 - 1; i >= start; i--) { 355e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov NormalizeCompositeChar(str[i], sBuffer); 356e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 357e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 358e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov i = j; 359e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_BOOL bSymbol = FALSE; 360e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov while(i > 0 && order.GetAt(i) != 2) { 361e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov bSymbol = !order.GetAt(i); 362e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov i -= 3; 363e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 364e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int end = start + count1 ; 365e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int n = 0; 366e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(bSymbol) { 367e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov n = i + 6; 368e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 369e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov n = i + 3; 370e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 371e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(n >= j) { 372e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(int m = start; m < end; m++) { 373e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov sBuffer += str[m]; 374e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 375e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 376e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov i = j; 377e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov j = n; 378e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(; n <= i; n += 3) { 379e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int start = order.GetAt(n - 2); 380e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int count1 = order.GetAt(n - 1); 381e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int end = start + count1 ; 382e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(int m = start; m < end; m++) { 383e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov sBuffer += str[m]; 384e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 385e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 386e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 387e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 388e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 389e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 390e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int count = order.GetSize(); 391e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_BOOL bL2R = FALSE; 392e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(int j = 0; j < count; j += 3) { 393e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int ret = order.GetAt(j + 2); 394e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int start = order.GetAt(j); 395e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int count1 = order.GetAt(j + 1); 396e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(ret == 2 || (j == 0 && ret == 0 && !bL2R)) { 397e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int i = j + 3; 398e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov while(bR2L && i < count) { 399e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(order.GetAt(i + 2) == 1) { 400e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 401e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 402e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov i += 3; 403e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 404e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 405e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(i == 3) { 406e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov j = -3; 407e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov bL2R = TRUE; 408e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 409e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 410e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int end = str.GetLength() - 1; 411e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if(i < count) { 412e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov end = order.GetAt(i) - 1; 413e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 414e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov j = i - 3; 415e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(int n = end; n >= start; n--) { 416e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov NormalizeCompositeChar(str[i], sBuffer); 417e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 418e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 419e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int end = start + count1 ; 420e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for(int i = start; i < end; i++) { 421e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov sBuffer += str[i]; 422e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 423e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 424e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 425e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 426e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov str.Empty(); 427e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov str += sBuffer; 428e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov BidiChar->Release(); 429e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 430e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovstatic FX_BOOL IsNumber(CFX_WideString& str) 431e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 432e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < str.GetLength(); i ++) { 433e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_WCHAR ch = str[i]; 434e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && ch != ' ') { 435e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 436e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 437e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 438e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return TRUE; 439e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 440e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextPage::FindColumns() 441e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 442e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int i; 443e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 444e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 445e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { 446e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); 447e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextColumn* pColumn = FindColumn(pTextBox->m_Right); 448e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pColumn == NULL) { 449e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pColumn = new CTextColumn; 450e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pColumn->m_Count = 1; 451e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pColumn->m_AvgPos = pTextBox->m_Right; 452e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pColumn->m_TextPos = -1; 453e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_TextColumns.Add(pColumn); 454e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 455e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pColumn->m_AvgPos = (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / 456e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov (pColumn->m_Count + 1); 457e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pColumn->m_Count ++; 458e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 459e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 460e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 461e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int mincount = m_BaseLines.GetSize() / 4; 462e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_TextColumns.GetSize(); i ++) { 463e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); 464e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pTextColumn->m_Count >= mincount) { 465e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 466e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 467e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov delete pTextColumn; 468e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_TextColumns.RemoveAt(i); 469e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov i --; 470e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 471e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_BaseLines.GetSize(); i ++) { 472e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); 473e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { 474e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); 475e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (IsNumber(pTextBox->m_Text)) { 476e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); 477e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 478e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 479e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 480e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 481e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) 482e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 483e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < m_TextColumns.GetSize(); i ++) { 484e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i); 485e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { 486e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return pColumn; 487e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 488e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 489e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return NULL; 490e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 491e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextPage::BreakSpace(CPDF_TextObject* pTextObj) 492e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 493e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 494e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextBaseLine::CTextBaseLine() 495e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 496e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Top = -100000; 497e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Bottom = 100000; 498e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_MaxFontSizeV = 0; 499e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 500e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovCTextBaseLine::~CTextBaseLine() 501e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 502e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < m_TextList.GetSize(); i ++) { 503e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); 504e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov delete pText; 505e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 506e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 507e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::InsertTextBox(FX_FLOAT leftx, FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT bottomy, 508e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT spacew, FX_FLOAT fontsize_v, const CFX_WideString& text) 509e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 510e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_Top < topy) { 511e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Top = topy; 512e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 513e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_Bottom > bottomy) { 514e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_Bottom = bottomy; 515e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 516e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (m_MaxFontSizeV < fontsize_v) { 517e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_MaxFontSizeV = fontsize_v; 518e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 519e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int i; 520e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_TextList.GetSize(); i ++) { 521e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); 522e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pText->m_Left > leftx) { 523e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 524e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 525e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 526e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = new CTextBox; 527e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_Text = text; 528e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_Left = leftx; 529e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_Right = rightx; 530e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_Top = topy; 531e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_Bottom = bottomy; 532e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_SpaceWidth = spacew; 533e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_FontSizeV = fontsize_v; 534e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_pColumn = NULL; 535e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_TextList.InsertAt(i, pText); 536e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 537e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL GetIntersection(FX_FLOAT low1, FX_FLOAT high1, FX_FLOAT low2, FX_FLOAT high2, 538e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT& interlow, FX_FLOAT& interhigh); 539e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther) 540e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 541e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT inter_top, inter_bottom; 542e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top, 543e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov inter_bottom, inter_top)) { 544e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 545e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 546e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT inter_h = inter_top - inter_bottom; 547e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (inter_h < (m_Top - m_Bottom) / 2 && inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) { 548e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 549e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 550e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); 551e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < m_TextList.GetSize(); i ++) { 552e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); 553e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int j = 0; j < pOther->m_TextList.GetSize(); j ++) { 554e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); 555e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT inter_left, inter_right; 556e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (!GetIntersection(pText->m_Left, pText->m_Right, 557e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pOtherText->m_Left, pOtherText->m_Right, inter_left, inter_right)) { 558e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 559e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 560e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT inter_w = inter_right - inter_left; 561e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (inter_w < pText->m_SpaceWidth / 2 && inter_w < pOtherText->m_SpaceWidth / 2) { 562e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 563e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 564e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || 565e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { 566e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 567e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 568e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 569e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 570e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return TRUE; 571e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 572e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::Merge(CTextBaseLine* pOther) 573e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 574e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < pOther->m_TextList.GetSize(); i ++) { 575e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); 576e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom, 577e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); 578e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 579e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 580e6986e1e8d4a57987f47c215490cb080a65ee29aSvet GanovFX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) 581e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 582e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int i; 583e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = 0; i < m_TextList.GetSize(); i ++) { 584e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); 585e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pText->m_Text != L" ") { 586e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 587e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 588e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 589e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (i == m_TextList.GetSize()) { 590e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return FALSE; 591e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 592e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); 593e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov leftx = pText->m_Left; 594e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (i = m_TextList.GetSize() - 1; i >= 0; i --) { 595e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); 596e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pText->m_Text != L" ") { 597e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 598e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 599e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 600e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pText = (CTextBox*)m_TextList.GetAt(i); 601e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov rightx = pText->m_Right; 602e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return TRUE; 603e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 604e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::MergeBoxes() 605e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 606e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int i = 0; 607e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov while (1) { 608e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (i >= m_TextList.GetSize() - 1) { 609e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov break; 610e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 611e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); 612e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); 613e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; 614e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) ? 615e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pNextText->m_SpaceWidth : pThisText->m_SpaceWidth; 616e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (spacew > 0.0 && dx < spacew * 2) { 617e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pThisText->m_Right = pNextText->m_Right; 618e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (dx > spacew * 1.5) { 619e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pThisText->m_Text += L" "; 620e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (dx > spacew / 3) { 621e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pThisText->m_Text += L' '; 622e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 623e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pThisText->m_Text += pNextText->m_Text; 624e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov pThisText->m_SpaceWidth = pNextText->m_SpaceWidth == 0.0 ? 625e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov spacew : pNextText->m_SpaceWidth; 626e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov m_TextList.RemoveAt(i + 1); 627e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov delete pNextText; 628e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 629e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov i ++; 630e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 631e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 632e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 633e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::WriteOutput(CFX_WideString& str, FX_FLOAT leftx, FX_FLOAT pagewidth, 634e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int iTextWidth) 635e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 636e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int lastpos = -1; 637e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < m_TextList.GetSize(); i ++) { 638e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); 639e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int xpos; 640e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pText->m_pColumn) { 641e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov xpos = (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + 0.5); 642e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov xpos -= pText->m_Text.GetLength(); 643e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 644e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5); 645e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 646e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (xpos <= lastpos) { 647e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov xpos = lastpos + 1; 648e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 649e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int j = lastpos + 1; j < xpos; j ++) { 650e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov str += ' '; 651e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 652e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString sSrc(pText->m_Text); 653e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov NormalizeString(sSrc); 654e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov str += sSrc; 655e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov str += ' '; 656e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov lastpos = xpos + pText->m_Text.GetLength(); 657e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 658e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 659e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) 660e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 661e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov minchars = 0; 662e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < m_TextList.GetSize(); i ++) { 663e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); 664e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pText->m_Right - pText->m_Left < 0.002) { 665e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 666e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 667e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov count += pText->m_Text.GetLength(); 668e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov width += pText->m_Right - pText->m_Left; 669e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov minchars += pText->m_Text.GetLength() + 1; 670e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 671e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 672e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov#define PI 3.1415926535897932384626433832795 673e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovstatic void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) 674e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 675e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int total_count = 0, rotated_count[3] = {0, 0, 0}; 676e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_POSITION pos = page.GetFirstObjectPosition(); 677e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov while (pos) { 678e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_PageObject* pObj = page.GetNextObject(pos); 679e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pObj->m_Type != PDFPAGE_TEXT) { 680e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 681e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 682e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov total_count ++; 683e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_TextObject* pText = (CPDF_TextObject*)pObj; 684e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_FLOAT angle = pText->m_TextState.GetBaselineAngle(); 685e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (angle == 0.0) { 686e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 687e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 688e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int degree = (int)(angle * 180 / PI + 0.5); 689e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (degree % 90) { 690e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 691e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 692e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (degree < 0) { 693e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov degree += 360; 694e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 695e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int index = degree / 90 % 3 - 1; 696e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (index < 0) { 697e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 698e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 699e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov rotated_count[index] ++; 700e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 701e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (total_count == 0) { 702e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return; 703e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 704e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_AffineMatrix matrix; 705e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (rotated_count[0] > total_count * 2 / 3) { 706e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov matrix.Set(0, -1, 1, 0, 0, page.GetPageHeight()); 707e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (rotated_count[1] > total_count * 2 / 3) { 708e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov matrix.Set(-1, 0, 0, -1, page.GetPageWidth(), page.GetPageHeight()); 709e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else if (rotated_count[2] > total_count * 2 / 3) { 710e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov matrix.Set(0, 1, -1, 0, page.GetPageWidth(), 0); 711e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } else { 712e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return; 713e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 714e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov page.Transform(matrix); 715e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov page_bbox.Transform(&matrix); 716e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 717e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage, 718e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int iMinWidth, FX_DWORD flags) 719e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 720e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov lines.RemoveAll(); 721e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pPage == NULL) { 722e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov return; 723e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 724e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_Page page; 725e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov page.Load(pDoc, pPage); 726e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_ParseOptions options; 727e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov options.m_bTextOnly = TRUE; 728e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov options.m_bSeparateForm = FALSE; 729e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov page.ParseContent(&options); 730e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_FloatRect page_bbox = page.GetPageBBox(); 731e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (flags & PDF2TXT_AUTO_ROTATE) { 732e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CheckRotate(page, page_bbox); 733e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 734e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CTextPage texts; 735e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH; 736e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN; 737e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov texts.m_bBreakSpace = TRUE; 738e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov FX_POSITION pos = page.GetFirstObjectPosition(); 739e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov while (pos) { 740e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_PageObject* pObject = page.GetNextObject(pos); 741e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) { 742e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right, pObject->m_Top); 743e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (!page_bbox.Contains(rect)) { 744e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov continue; 745e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 746e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 747e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov texts.ProcessObject(pObject); 748e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 749e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov texts.WriteOutput(lines, iMinWidth); 750e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 751e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage, 752e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov int iMinWidth, FX_DWORD flags) 753e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 754e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov lines.RemoveAll(); 755e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideStringArray wlines; 756e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags); 757e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int i = 0; i < wlines.GetSize(); i ++) { 758e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_WideString wstr = wlines[i]; 759e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_ByteString str; 760e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov for (int c = 0; c < wstr.GetLength(); c ++) { 761e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?"); 762e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 763e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov lines.Add(str); 764e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 765e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 766e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovextern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage, FX_BOOL bUseLF, 767e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CFX_PtrArray* pObjArray); 768e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovvoid PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage, FX_DWORD flags) 769e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov{ 770e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov buffer.EstimateSize(0, 10240); 771e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_Page page; 772e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov page.Load(pDoc, pPage); 773e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov CPDF_ParseOptions options; 774e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov options.m_bTextOnly = TRUE; 775e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov options.m_bSeparateForm = FALSE; 776e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov page.ParseContent(&options); 777e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); 778e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 779