1// Copyright 2016 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfdoc/ctypeset.h"
8
9#include <algorithm>
10
11#include "core/fpdfdoc/cline.h"
12#include "core/fpdfdoc/cpdf_variabletext.h"
13#include "core/fpdfdoc/cpvt_wordinfo.h"
14#include "core/fpdfdoc/csection.h"
15#include "third_party/base/stl_util.h"
16
17namespace {
18
19const uint8_t special_chars[128] = {
20    0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
21    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
22    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00,
23    0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28,
24    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08,
25    0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
26    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
27    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
28    0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
29    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
30    0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
31};
32
33bool IsLatin(uint16_t word) {
34  if (word <= 0x007F)
35    return !!(special_chars[word] & 0x01);
36
37  return ((word >= 0x00C0 && word <= 0x00FF) ||
38          (word >= 0x0100 && word <= 0x024F) ||
39          (word >= 0x1E00 && word <= 0x1EFF) ||
40          (word >= 0x2C60 && word <= 0x2C7F) ||
41          (word >= 0xA720 && word <= 0xA7FF) ||
42          (word >= 0xFF21 && word <= 0xFF3A) ||
43          (word >= 0xFF41 && word <= 0xFF5A));
44}
45
46bool IsDigit(uint32_t word) {
47  return word >= 0x0030 && word <= 0x0039;
48}
49
50bool IsCJK(uint32_t word) {
51  if ((word >= 0x1100 && word <= 0x11FF) ||
52      (word >= 0x2E80 && word <= 0x2FFF) ||
53      (word >= 0x3040 && word <= 0x9FBF) ||
54      (word >= 0xAC00 && word <= 0xD7AF) ||
55      (word >= 0xF900 && word <= 0xFAFF) ||
56      (word >= 0xFE30 && word <= 0xFE4F) ||
57      (word >= 0x20000 && word <= 0x2A6DF) ||
58      (word >= 0x2F800 && word <= 0x2FA1F)) {
59    return true;
60  }
61  if (word >= 0x3000 && word <= 0x303F) {
62    return (
63        word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 ||
64        word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 ||
65        word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 ||
66        word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035);
67  }
68  return word >= 0xFF66 && word <= 0xFF9D;
69}
70
71bool IsPunctuation(uint32_t word) {
72  if (word <= 0x007F)
73    return !!(special_chars[word] & 0x08);
74
75  if (word >= 0x0080 && word <= 0x00FF) {
76    return (word == 0x0082 || word == 0x0084 || word == 0x0085 ||
77            word == 0x0091 || word == 0x0092 || word == 0x0093 ||
78            word <= 0x0094 || word == 0x0096 || word == 0x00B4 ||
79            word == 0x00B8);
80  }
81
82  if (word >= 0x2000 && word <= 0x206F) {
83    return (
84        word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 ||
85        word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B ||
86        word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F ||
87        word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 ||
88        word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D ||
89        word == 0x203E || word == 0x2044);
90  }
91
92  if (word >= 0x3000 && word <= 0x303F) {
93    return (
94        word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 ||
95        word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C ||
96        word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 ||
97        word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 ||
98        word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A ||
99        word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F);
100  }
101
102  if (word >= 0xFE50 && word <= 0xFE6F)
103    return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63;
104
105  if (word >= 0xFF00 && word <= 0xFFEF) {
106    return (
107        word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 ||
108        word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F ||
109        word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B ||
110        word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C ||
111        word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 ||
112        word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F);
113  }
114
115  return false;
116}
117
118bool IsConnectiveSymbol(uint32_t word) {
119  return word <= 0x007F && (special_chars[word] & 0x20);
120}
121
122bool IsOpenStylePunctuation(uint32_t word) {
123  if (word <= 0x007F)
124    return !!(special_chars[word] & 0x04);
125
126  return (word == 0x300A || word == 0x300C || word == 0x300E ||
127          word == 0x3010 || word == 0x3014 || word == 0x3016 ||
128          word == 0x3018 || word == 0x301A || word == 0xFF08 ||
129          word == 0xFF3B || word == 0xFF5B || word == 0xFF62);
130}
131
132bool IsCurrencySymbol(uint16_t word) {
133  return (word == 0x0024 || word == 0x0080 || word == 0x00A2 ||
134          word == 0x00A3 || word == 0x00A4 || word == 0x00A5 ||
135          (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 ||
136          word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 ||
137          word == 0xFFE5 || word == 0xFFE6);
138}
139
140bool IsPrefixSymbol(uint16_t word) {
141  return IsCurrencySymbol(word) || word == 0x2116;
142}
143
144bool IsSpace(uint16_t word) {
145  return word == 0x0020 || word == 0x3000;
146}
147
148bool NeedDivision(uint16_t prevWord, uint16_t curWord) {
149  if ((IsLatin(prevWord) || IsDigit(prevWord)) &&
150      (IsLatin(curWord) || IsDigit(curWord))) {
151    return false;
152  }
153  if (IsSpace(curWord) || IsPunctuation(curWord)) {
154    return false;
155  }
156  if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) {
157    return false;
158  }
159  if (IsSpace(prevWord) || IsPunctuation(prevWord)) {
160    return true;
161  }
162  if (IsPrefixSymbol(prevWord)) {
163    return false;
164  }
165  if (IsPrefixSymbol(curWord) || IsCJK(curWord)) {
166    return true;
167  }
168  if (IsCJK(prevWord)) {
169    return true;
170  }
171  return false;
172}
173
174}  // namespace
175
176CTypeset::CTypeset(CSection* pSection)
177    : m_rcRet(0.0f, 0.0f, 0.0f, 0.0f),
178      m_pVT(pSection->m_pVT),
179      m_pSection(pSection) {}
180
181CTypeset::~CTypeset() {}
182
183CPVT_FloatRect CTypeset::CharArray() {
184  m_rcRet = CPVT_FloatRect(0, 0, 0, 0);
185  if (m_pSection->m_LineArray.empty())
186    return m_rcRet;
187
188  float fNodeWidth = m_pVT->GetPlateWidth() /
189                     (m_pVT->GetCharArray() <= 0 ? 1 : m_pVT->GetCharArray());
190  float fLineAscent =
191      m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
192  float fLineDescent =
193      m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
194  float x = 0.0f;
195  float y = m_pVT->GetLineLeading() + fLineAscent;
196  int32_t nStart = 0;
197  CLine* pLine = m_pSection->m_LineArray.front().get();
198  switch (m_pVT->GetAlignment()) {
199    case 0:
200      pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF;
201      break;
202    case 1:
203      nStart = (m_pVT->GetCharArray() -
204                pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray)) /
205               2;
206      pLine->m_LineInfo.fLineX =
207          fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
208      break;
209    case 2:
210      nStart = m_pVT->GetCharArray() -
211               pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
212      pLine->m_LineInfo.fLineX =
213          fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
214      break;
215  }
216  for (int32_t w = 0,
217               sz = pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
218       w < sz; w++) {
219    if (w >= m_pVT->GetCharArray())
220      break;
221
222    float fNextWidth = 0;
223    if (pdfium::IndexInBounds(m_pSection->m_WordArray, w + 1)) {
224      CPVT_WordInfo* pNextWord = m_pSection->m_WordArray[w + 1].get();
225      pNextWord->fWordTail = 0;
226      fNextWidth = m_pVT->GetWordWidth(*pNextWord);
227    }
228    CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
229    pWord->fWordTail = 0;
230    float fWordWidth = m_pVT->GetWordWidth(*pWord);
231    float fWordAscent = m_pVT->GetWordAscent(*pWord);
232    float fWordDescent = m_pVT->GetWordDescent(*pWord);
233    x = (float)(fNodeWidth * (w + nStart + 0.5) -
234                fWordWidth * VARIABLETEXT_HALF);
235    pWord->fWordX = x;
236    pWord->fWordY = y;
237    if (w == 0) {
238      pLine->m_LineInfo.fLineX = x;
239    }
240    if (w != pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1) {
241      pWord->fWordTail =
242          (fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0
243               ? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF
244               : 0);
245    } else {
246      pWord->fWordTail = 0;
247    }
248    x += fWordWidth;
249    fLineAscent = std::max(fLineAscent, fWordAscent);
250    fLineDescent = std::min(fLineDescent, fWordDescent);
251  }
252  pLine->m_LineInfo.nBeginWordIndex = 0;
253  pLine->m_LineInfo.nEndWordIndex =
254      pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1;
255  pLine->m_LineInfo.fLineY = y;
256  pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX;
257  pLine->m_LineInfo.fLineAscent = fLineAscent;
258  pLine->m_LineInfo.fLineDescent = fLineDescent;
259  m_rcRet = CPVT_FloatRect(0, 0, x, y - fLineDescent);
260  return m_rcRet;
261}
262
263CFX_SizeF CTypeset::GetEditSize(float fFontSize) {
264  ASSERT(m_pSection);
265  ASSERT(m_pVT);
266  SplitLines(false, fFontSize);
267  return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height());
268}
269
270CPVT_FloatRect CTypeset::Typeset() {
271  ASSERT(m_pVT);
272  m_pSection->m_LineArray.clear();
273  SplitLines(true, 0.0f);
274  OutputLines();
275  return m_rcRet;
276}
277
278void CTypeset::SplitLines(bool bTypeset, float fFontSize) {
279  ASSERT(m_pVT);
280  ASSERT(m_pSection);
281  int32_t nLineHead = 0;
282  int32_t nLineTail = 0;
283  float fMaxX = 0.0f, fMaxY = 0.0f;
284  float fLineWidth = 0.0f, fBackupLineWidth = 0.0f;
285  float fLineAscent = 0.0f, fBackupLineAscent = 0.0f;
286  float fLineDescent = 0.0f, fBackupLineDescent = 0.0f;
287  int32_t nWordStartPos = 0;
288  bool bFullWord = false;
289  int32_t nLineFullWordIndex = 0;
290  int32_t nCharIndex = 0;
291  CPVT_LineInfo line;
292  float fWordWidth = 0;
293  float fTypesetWidth =
294      std::max(m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(), 0.0f);
295  int32_t nTotalWords =
296      pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
297  bool bOpened = false;
298  if (nTotalWords > 0) {
299    int32_t i = 0;
300    while (i < nTotalWords) {
301      CPVT_WordInfo* pWord = m_pSection->m_WordArray[i].get();
302      CPVT_WordInfo* pOldWord = pWord;
303      if (i > 0) {
304        pOldWord = m_pSection->m_WordArray[i - 1].get();
305      }
306      if (pWord) {
307        if (bTypeset) {
308          fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord));
309          fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord));
310          fWordWidth = m_pVT->GetWordWidth(*pWord);
311        } else {
312          fLineAscent =
313              std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize));
314          fLineDescent =
315              std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize));
316          fWordWidth = m_pVT->GetWordWidth(
317              pWord->nFontIndex, pWord->Word, m_pVT->GetSubWord(),
318              m_pVT->GetCharSpace(), m_pVT->GetHorzScale(), fFontSize,
319              pWord->fWordTail);
320        }
321        if (!bOpened) {
322          if (IsOpenStylePunctuation(pWord->Word)) {
323            bOpened = true;
324            bFullWord = true;
325          } else if (pOldWord) {
326            if (NeedDivision(pOldWord->Word, pWord->Word)) {
327              bFullWord = true;
328            }
329          }
330        } else {
331          if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) {
332            bOpened = false;
333          }
334        }
335        if (bFullWord) {
336          bFullWord = false;
337          if (nCharIndex > 0) {
338            nLineFullWordIndex++;
339          }
340          nWordStartPos = i;
341          fBackupLineWidth = fLineWidth;
342          fBackupLineAscent = fLineAscent;
343          fBackupLineDescent = fLineDescent;
344        }
345        nCharIndex++;
346      }
347      if (m_pVT->IsAutoReturn() && fTypesetWidth > 0 &&
348          fLineWidth + fWordWidth > fTypesetWidth) {
349        if (nLineFullWordIndex > 0) {
350          i = nWordStartPos;
351          fLineWidth = fBackupLineWidth;
352          fLineAscent = fBackupLineAscent;
353          fLineDescent = fBackupLineDescent;
354        }
355        if (nCharIndex == 1) {
356          fLineWidth = fWordWidth;
357          i++;
358        }
359        nLineTail = i - 1;
360        if (bTypeset) {
361          line.nBeginWordIndex = nLineHead;
362          line.nEndWordIndex = nLineTail;
363          line.nTotalWord = nLineTail - nLineHead + 1;
364          line.fLineWidth = fLineWidth;
365          line.fLineAscent = fLineAscent;
366          line.fLineDescent = fLineDescent;
367          m_pSection->AddLine(line);
368        }
369        fMaxY += (fLineAscent + m_pVT->GetLineLeading());
370        fMaxY -= fLineDescent;
371        fMaxX = std::max(fLineWidth, fMaxX);
372        nLineHead = i;
373        fLineWidth = 0.0f;
374        fLineAscent = 0.0f;
375        fLineDescent = 0.0f;
376        nCharIndex = 0;
377        nLineFullWordIndex = 0;
378        bFullWord = false;
379      } else {
380        fLineWidth += fWordWidth;
381        i++;
382      }
383    }
384    if (nLineHead <= nTotalWords - 1) {
385      nLineTail = nTotalWords - 1;
386      if (bTypeset) {
387        line.nBeginWordIndex = nLineHead;
388        line.nEndWordIndex = nLineTail;
389        line.nTotalWord = nLineTail - nLineHead + 1;
390        line.fLineWidth = fLineWidth;
391        line.fLineAscent = fLineAscent;
392        line.fLineDescent = fLineDescent;
393        m_pSection->AddLine(line);
394      }
395      fMaxY += (fLineAscent + m_pVT->GetLineLeading());
396      fMaxY -= fLineDescent;
397      fMaxX = std::max(fLineWidth, fMaxX);
398    }
399  } else {
400    if (bTypeset) {
401      fLineAscent = m_pVT->GetLineAscent();
402      fLineDescent = m_pVT->GetLineDescent();
403    } else {
404      fLineAscent =
405          m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize);
406      fLineDescent =
407          m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize);
408    }
409    if (bTypeset) {
410      line.nBeginWordIndex = -1;
411      line.nEndWordIndex = -1;
412      line.nTotalWord = 0;
413      line.fLineWidth = 0;
414      line.fLineAscent = fLineAscent;
415      line.fLineDescent = fLineDescent;
416      m_pSection->AddLine(line);
417    }
418    fMaxY += m_pVT->GetLineLeading() + fLineAscent - fLineDescent;
419  }
420  m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY);
421}
422
423void CTypeset::OutputLines() {
424  ASSERT(m_pVT);
425  ASSERT(m_pSection);
426  float fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f;
427  float fPosX = 0.0f, fPosY = 0.0f;
428  float fLineIndent = m_pVT->GetLineIndent();
429  float fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f);
430  switch (m_pVT->GetAlignment()) {
431    default:
432    case 0:
433      fMinX = 0.0f;
434      break;
435    case 1:
436      fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF;
437      break;
438    case 2:
439      fMinX = fTypesetWidth - m_rcRet.Width();
440      break;
441  }
442  fMaxX = fMinX + m_rcRet.Width();
443  fMinY = 0.0f;
444  fMaxY = m_rcRet.Height();
445  int32_t nTotalLines =
446      pdfium::CollectionSize<int32_t>(m_pSection->m_LineArray);
447  if (nTotalLines > 0) {
448    for (int32_t l = 0; l < nTotalLines; l++) {
449      CLine* pLine = m_pSection->m_LineArray[l].get();
450      switch (m_pVT->GetAlignment()) {
451        default:
452        case 0:
453          fPosX = 0;
454          break;
455        case 1:
456          fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) *
457                  VARIABLETEXT_HALF;
458          break;
459        case 2:
460          fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth;
461          break;
462      }
463      fPosX += fLineIndent;
464      fPosY += m_pVT->GetLineLeading();
465      fPosY += pLine->m_LineInfo.fLineAscent;
466      pLine->m_LineInfo.fLineX = fPosX - fMinX;
467      pLine->m_LineInfo.fLineY = fPosY - fMinY;
468      for (int32_t w = pLine->m_LineInfo.nBeginWordIndex;
469           w <= pLine->m_LineInfo.nEndWordIndex; w++) {
470        if (pdfium::IndexInBounds(m_pSection->m_WordArray, w)) {
471          CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
472          pWord->fWordX = fPosX - fMinX;
473          pWord->fWordY = fPosY - fMinY;
474
475          fPosX += m_pVT->GetWordWidth(*pWord);
476        }
477      }
478      fPosY -= pLine->m_LineInfo.fLineDescent;
479    }
480  }
481  m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY);
482}
483