1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "xfa/src/foxitlib.h"
8#include "fx_wordbreak_impl.h"
9#define FX_IsOdd(a) ((a)&1)
10FX_WordBreakProp FX_GetWordBreakProperty(FX_WCHAR wcCodePoint) {
11  FX_DWORD dwProperty =
12      (FX_DWORD)gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1];
13  return (FX_WordBreakProp)(FX_IsOdd(wcCodePoint) ? (dwProperty & 0x0F)
14                                                  : (dwProperty >> 4));
15}
16CFX_CharIter::CFX_CharIter(const CFX_WideString& wsText)
17    : m_wsText(wsText), m_nIndex(0) {
18  FXSYS_assert(!wsText.IsEmpty());
19}
20CFX_CharIter::~CFX_CharIter() {}
21void CFX_CharIter::Release() {
22  delete this;
23}
24FX_BOOL CFX_CharIter::Next(FX_BOOL bPrev) {
25  if (bPrev) {
26    if (m_nIndex <= 0) {
27      return FALSE;
28    }
29    m_nIndex--;
30  } else {
31    if (m_nIndex + 1 >= m_wsText.GetLength()) {
32      return FALSE;
33    }
34    m_nIndex++;
35  }
36  return TRUE;
37}
38FX_WCHAR CFX_CharIter::GetChar() {
39  return m_wsText.GetAt(m_nIndex);
40}
41void CFX_CharIter::SetAt(int32_t nIndex) {
42  if (nIndex < 0 || nIndex >= m_wsText.GetLength()) {
43    return;
44  }
45  m_nIndex = nIndex;
46}
47int32_t CFX_CharIter::GetAt() const {
48  return m_nIndex;
49}
50FX_BOOL CFX_CharIter::IsEOF(FX_BOOL bTail) const {
51  return bTail ? (m_nIndex + 1 == m_wsText.GetLength()) : (m_nIndex == 0);
52}
53IFX_CharIter* CFX_CharIter::Clone() {
54  CFX_CharIter* pIter = new CFX_CharIter(m_wsText);
55  pIter->m_nIndex = m_nIndex;
56  return pIter;
57}
58CFX_WordBreak::CFX_WordBreak() : m_pPreIter(NULL), m_pCurIter(NULL) {}
59CFX_WordBreak::~CFX_WordBreak() {
60  if (m_pPreIter) {
61    m_pPreIter->Release();
62    m_pPreIter = NULL;
63  }
64  if (m_pCurIter) {
65    m_pCurIter->Release();
66    m_pCurIter = NULL;
67  }
68}
69void CFX_WordBreak::Release() {
70  delete this;
71}
72void CFX_WordBreak::Attach(IFX_CharIter* pIter) {
73  FXSYS_assert(pIter);
74  m_pCurIter = pIter;
75}
76void CFX_WordBreak::Attach(const CFX_WideString& wsText) {
77  m_pCurIter = new CFX_CharIter(wsText);
78}
79FX_BOOL CFX_WordBreak::Next(FX_BOOL bPrev) {
80  IFX_CharIter* pIter = bPrev ? m_pPreIter->Clone() : m_pCurIter->Clone();
81  if (pIter->IsEOF(!bPrev)) {
82    return FALSE;
83  }
84  pIter->Next(bPrev);
85  if (!FindNextBreakPos(pIter, bPrev, TRUE)) {
86    pIter->Release();
87    return FALSE;
88  }
89  if (bPrev) {
90    m_pCurIter->Release();
91    m_pCurIter = m_pPreIter;
92    m_pCurIter->Next(TRUE);
93    m_pPreIter = pIter;
94  } else {
95    m_pPreIter->Release();
96    m_pPreIter = m_pCurIter;
97    m_pPreIter->Next();
98    m_pCurIter = pIter;
99  }
100  return TRUE;
101}
102void CFX_WordBreak::SetAt(int32_t nIndex) {
103  if (m_pPreIter) {
104    m_pPreIter->Release();
105    m_pPreIter = NULL;
106  }
107  m_pCurIter->SetAt(nIndex);
108  FindNextBreakPos(m_pCurIter, TRUE, FALSE);
109  m_pPreIter = m_pCurIter;
110  m_pCurIter = m_pPreIter->Clone();
111  FindNextBreakPos(m_pCurIter, FALSE, FALSE);
112}
113int32_t CFX_WordBreak::GetWordPos() const {
114  return m_pPreIter->GetAt();
115}
116int32_t CFX_WordBreak::GetWordLength() const {
117  return m_pCurIter->GetAt() - m_pPreIter->GetAt() + 1;
118}
119void CFX_WordBreak::GetWord(CFX_WideString& wsWord) const {
120  int32_t nWordLength = GetWordLength();
121  if (nWordLength <= 0) {
122    return;
123  }
124  FX_WCHAR* lpBuf = wsWord.GetBuffer(nWordLength);
125  IFX_CharIter* pTempIter = m_pPreIter->Clone();
126  int32_t i = 0;
127  while (pTempIter->GetAt() <= m_pCurIter->GetAt()) {
128    lpBuf[i++] = pTempIter->GetChar();
129    FX_BOOL bEnd = pTempIter->Next();
130    if (!bEnd) {
131      break;
132    }
133  }
134  pTempIter->Release();
135  wsWord.ReleaseBuffer(nWordLength);
136}
137FX_BOOL CFX_WordBreak::IsEOF(FX_BOOL bTail) const {
138  return m_pCurIter->IsEOF(bTail);
139}
140FX_BOOL CFX_WordBreak::FindNextBreakPos(IFX_CharIter* pIter,
141                                        FX_BOOL bPrev,
142                                        FX_BOOL bFromNext) {
143  FX_WordBreakProp ePreType = FX_WordBreakProp_None;
144  FX_WordBreakProp eCurType = FX_WordBreakProp_None;
145  FX_WordBreakProp eNextType = FX_WordBreakProp_None;
146  if (pIter->IsEOF(!bPrev)) {
147    return TRUE;
148  }
149  if (!(bFromNext || pIter->IsEOF(bPrev))) {
150    pIter->Next(!bPrev);
151    FX_WCHAR wcTemp = pIter->GetChar();
152    ePreType = FX_GetWordBreakProperty(wcTemp);
153    pIter->Next(bPrev);
154  }
155  FX_WCHAR wcTemp = pIter->GetChar();
156  eCurType = FX_GetWordBreakProperty(wcTemp);
157  FX_BOOL bFirst = TRUE;
158  do {
159    pIter->Next(bPrev);
160    FX_WCHAR wcTemp = pIter->GetChar();
161    eNextType = FX_GetWordBreakProperty(wcTemp);
162    FX_WORD wBreak =
163        gs_FX_WordBreak_Table[eCurType] & ((FX_WORD)(1 << eNextType));
164    if (wBreak) {
165      if (pIter->IsEOF(!bPrev)) {
166        pIter->Next(!bPrev);
167        return TRUE;
168      }
169      if (bFirst) {
170        int32_t nFlags = 0;
171        if (eCurType == FX_WordBreakProp_MidLetter) {
172          if (eNextType == FX_WordBreakProp_ALetter) {
173            nFlags = 1;
174          }
175        } else if (eCurType == FX_WordBreakProp_MidNum) {
176          if (eNextType == FX_WordBreakProp_Numberic) {
177            nFlags = 2;
178          }
179        } else if (eCurType == FX_WordBreakProp_MidNumLet) {
180          if (eNextType == FX_WordBreakProp_ALetter) {
181            nFlags = 1;
182          } else if (eNextType == FX_WordBreakProp_Numberic) {
183            nFlags = 2;
184          }
185        }
186        if (nFlags > 0) {
187          FXSYS_assert(nFlags <= 2);
188          if (!((nFlags == 1 && ePreType == FX_WordBreakProp_ALetter) ||
189                (nFlags == 2 && ePreType == FX_WordBreakProp_Numberic))) {
190            pIter->Next(!bPrev);
191            return TRUE;
192          }
193          pIter->Next(bPrev);
194          wBreak = FALSE;
195        }
196        bFirst = FALSE;
197      }
198      if (wBreak) {
199        int32_t nFlags = 0;
200        if (eNextType == FX_WordBreakProp_MidLetter) {
201          if (eCurType == FX_WordBreakProp_ALetter) {
202            nFlags = 1;
203          }
204        } else if (eNextType == FX_WordBreakProp_MidNum) {
205          if (eCurType == FX_WordBreakProp_Numberic) {
206            nFlags = 2;
207          }
208        } else if (eNextType == FX_WordBreakProp_MidNumLet) {
209          if (eCurType == FX_WordBreakProp_ALetter) {
210            nFlags = 1;
211          } else if (eCurType == FX_WordBreakProp_Numberic) {
212            nFlags = 2;
213          }
214        }
215        if (nFlags <= 0) {
216          pIter->Next(!bPrev);
217          return TRUE;
218        }
219        FXSYS_assert(nFlags <= 2);
220        pIter->Next(bPrev);
221        wcTemp = pIter->GetChar();
222        eNextType = (FX_WordBreakProp)FX_GetWordBreakProperty(wcTemp);
223        if (!((nFlags == 1 && eNextType == FX_WordBreakProp_ALetter) ||
224              (nFlags == 2 && eNextType == FX_WordBreakProp_Numberic))) {
225          pIter->Next(!bPrev);
226          pIter->Next(!bPrev);
227          return TRUE;
228        }
229      }
230    }
231    ePreType = eCurType;
232    eCurType = eNextType;
233    bFirst = FALSE;
234  } while (!pIter->IsEOF(!bPrev));
235  return TRUE;
236}
237IFX_WordBreak* FX_WordBreak_Create() {
238  return new CFX_WordBreak;
239}
240