LineBreaker.cpp revision bb86b433f97a301c11800806b1ce5331fa227d4a
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define VERBOSE_DEBUG 0
18
19#include <limits>
20
21#define LOG_TAG "Minikin"
22#include <cutils/log.h>
23
24#include <minikin/Layout.h>
25#include <minikin/LineBreaker.h>
26
27using std::vector;
28
29namespace android {
30
31const int CHAR_TAB = 0x0009;
32const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
33
34// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
35// constants are larger than any reasonable actual width score.
36const float SCORE_INFTY = std::numeric_limits<float>::max();
37const float SCORE_OVERFULL = 1e12f;
38const float SCORE_DESPERATE = 1e10f;
39
40// When the text buffer is within this limit, capacity of vectors is retained at finish(),
41// to avoid allocation.
42const size_t MAX_TEXT_BUF_RETAIN = 32678;
43
44void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
45    delete mBreakIterator;
46    UErrorCode status = U_ZERO_ERROR;
47    mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
48    // TODO: check status
49
50    // TODO: load actual resource dependent on locale; letting Minikin do it is a hack
51    mHyphenator = hyphenator;
52}
53
54void LineBreaker::setText() {
55    UErrorCode status = U_ZERO_ERROR;
56    utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status);
57    mBreakIterator->setText(&mUText, status);
58    mBreakIterator->first();
59
60    // handle initial break here because addStyleRun may never be called
61    mBreakIterator->next();
62    mCandidates.clear();
63    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0};
64    mCandidates.push_back(cand);
65
66    // reset greedy breaker state
67    mBreaks.clear();
68    mWidths.clear();
69    mFlags.clear();
70    mLastBreak = 0;
71    mBestBreak = 0;
72    mBestScore = SCORE_INFTY;
73    mPreBreak = 0;
74    mFirstTabIndex = INT_MAX;
75}
76
77void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
78    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
79}
80
81
82void LineBreaker::setIndents(const std::vector<float>& indents) {
83    mLineWidths.setIndents(indents);
84}
85
86// This function determines whether a character is a space that disappears at end of line.
87// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]]
88// Note: all such characters are in the BMP, so it's ok to use code units for this.
89static bool isLineEndSpace(uint16_t c) {
90    return c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || c == 0x205F ||
91            c == 0x3000;
92}
93
94// Ordinarily, this method measures the text in the range given. However, when paint
95// is nullptr, it assumes the widths have already been calculated and stored in the
96// width buffer.
97// This method finds the candidate word breaks (using the ICU break iterator) and sends them
98// to addCandidate.
99float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface,
100        FontStyle style, size_t start, size_t end, bool isRtl) {
101    Layout layout;  // performance TODO: move layout to self object to reduce allocation cost?
102    float width = 0.0f;
103    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
104
105    float hyphenPenalty = 0.0;
106    if (paint != nullptr) {
107        layout.setFontCollection(typeface);
108        layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style,
109                *paint);
110        layout.getAdvances(mCharWidths.data() + start);
111        width = layout.getAdvance();
112
113        // a heuristic that seems to perform well
114        hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
115    }
116
117    size_t current = (size_t)mBreakIterator->current();
118    size_t wordEnd = start;
119    size_t lastBreak = start;
120    ParaWidth lastBreakWidth = mWidth;
121    ParaWidth postBreak = mWidth;
122    for (size_t i = start; i < end; i++) {
123        uint16_t c = mTextBuf[i];
124        if (c == CHAR_TAB) {
125            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
126            if (mFirstTabIndex == INT_MAX) {
127                mFirstTabIndex = (int)i;
128            }
129            // fall back to greedy; other modes don't know how to deal with tabs
130            mStrategy = kBreakStrategy_Greedy;
131        } else {
132            mWidth += mCharWidths[i];
133            if (!isLineEndSpace(c)) {
134                postBreak = mWidth;
135                wordEnd = i + 1;
136            }
137        }
138        if (i + 1 == current) {
139            // Override ICU's treatment of soft hyphen as a break opportunity, because we want it
140            // to be a hyphen break, with penalty and drawing behavior.
141            if (c != CHAR_SOFT_HYPHEN) {
142                if (paint != nullptr && mHyphenator != nullptr && wordEnd > lastBreak) {
143                    mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak);
144    #if VERBOSE_DEBUG
145                    std::string hyphenatedString;
146                    for (size_t j = lastBreak; j < wordEnd; j++) {
147                        if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-');
148                        // Note: only works with ASCII, should do UTF-8 conversion here
149                        hyphenatedString.push_back(buffer()[j]);
150                    }
151                    ALOGD("hyphenated string: %s", hyphenatedString.c_str());
152    #endif
153
154                    // measure hyphenated substrings
155                    for (size_t j = lastBreak; j < wordEnd; j++) {
156                        uint8_t hyph = mHyphBuf[j - lastBreak];
157                        if (hyph) {
158                            paint->hyphenEdit = hyph;
159                            layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak,
160                                    mTextBuf.size(), bidiFlags, style, *paint);
161                            ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance();
162                            paint->hyphenEdit = 0;
163                            layout.doLayout(mTextBuf.data(), j, wordEnd - j,
164                                    mTextBuf.size(), bidiFlags, style, *paint);
165                            ParaWidth hyphPreBreak = postBreak - layout.getAdvance();
166                            addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph);
167                        }
168                    }
169                }
170
171                // Skip break for zero-width characters.
172                if (current == mTextBuf.size() || mCharWidths[current] > 0) {
173                    addWordBreak(current, mWidth, postBreak, 0.0, 0);
174                }
175                lastBreak = current;
176                lastBreakWidth = mWidth;
177            }
178            current = (size_t)mBreakIterator->next();
179        }
180    }
181
182    return width;
183}
184
185// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
186// needed (ie when word exceeds current line width)
187void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
188        float penalty, uint8_t hyph) {
189    Candidate cand;
190    ParaWidth width = mCandidates.back().preBreak;
191    if (postBreak - width > currentLineWidth()) {
192        // Add desperate breaks.
193        // Note: these breaks are based on the shaping of the (non-broken) original text; they
194        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
195        size_t i = mCandidates.back().offset;
196        width += mCharWidths[i++];
197        for (; i < offset; i++) {
198            float w = mCharWidths[i];
199            if (w > 0) {
200                cand.offset = i;
201                cand.preBreak = width;
202                cand.postBreak = width;
203                cand.penalty = SCORE_DESPERATE;
204                cand.hyphenEdit = 0;
205#if VERBOSE_DEBUG
206                ALOGD("desperate cand: %d %g:%g",
207                        mCandidates.size(), cand.postBreak, cand.preBreak);
208#endif
209                addCandidate(cand);
210                width += w;
211            }
212        }
213    }
214
215    cand.offset = offset;
216    cand.preBreak = preBreak;
217    cand.postBreak = postBreak;
218    cand.penalty = penalty;
219    cand.hyphenEdit = hyph;
220#if VERBOSE_DEBUG
221    ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
222#endif
223    addCandidate(cand);
224}
225
226// TODO: for justified text, refine with shrink/stretch
227float LineBreaker::computeScore(float delta, bool atEnd) {
228    if (delta < 0) {
229        return SCORE_OVERFULL;
230    } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
231        return 0.0;
232    } else {
233        return delta * delta;
234    }
235}
236
237// TODO performance: could avoid populating mCandidates if greedy only
238void LineBreaker::addCandidate(Candidate cand) {
239    size_t candIndex = mCandidates.size();
240    mCandidates.push_back(cand);
241    if (cand.postBreak - mPreBreak > currentLineWidth()) {
242        // This break would create an overfull line, pick the best break and break there (greedy)
243        if (mBestBreak == mLastBreak) {
244            mBestBreak = candIndex;
245        }
246        pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak,
247                mCandidates[mBestBreak].hyphenEdit);
248        mBestScore = SCORE_INFTY;
249#if VERBOSE_DEBUG
250        ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
251#endif
252        mLastBreak = mBestBreak;
253        mPreBreak = mCandidates[mBestBreak].preBreak;
254    }
255    if (cand.penalty <= mBestScore) {
256        mBestBreak = candIndex;
257        mBestScore = cand.penalty;
258    }
259}
260
261void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) {
262    mBreaks.push_back(offset);
263    mWidths.push_back(width);
264    int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
265    flags |= hyph;
266    mFlags.push_back(flags);
267    mFirstTabIndex = INT_MAX;
268}
269
270void LineBreaker::addReplacement(size_t start, size_t end, float width) {
271    mCharWidths[start] = width;
272    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
273    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
274}
275
276float LineBreaker::currentLineWidth() const {
277    return mLineWidths.getLineWidth(mBreaks.size());
278}
279
280void LineBreaker::computeBreaksGreedy() {
281    // All breaks but the last have been added in addCandidate already.
282    size_t nCand = mCandidates.size();
283    if (nCand == 1 || mLastBreak != nCand - 1) {
284        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0);
285        // don't need to update mBestScore, because we're done
286#if VERBOSE_DEBUG
287        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
288#endif
289    }
290}
291
292// Follow "prev" links in mCandidates array, and copy to result arrays.
293void LineBreaker::finishBreaksOptimal() {
294    // clear existing greedy break result
295    mBreaks.clear();
296    mWidths.clear();
297    mFlags.clear();
298    size_t nCand = mCandidates.size();
299    size_t prev;
300    for (size_t i = nCand - 1; i > 0; i = prev) {
301        prev = mCandidates[i].prev;
302        mBreaks.push_back(mCandidates[i].offset);
303        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
304        mFlags.push_back(mCandidates[i].hyphenEdit);
305    }
306    std::reverse(mBreaks.begin(), mBreaks.end());
307    std::reverse(mWidths.begin(), mWidths.end());
308    std::reverse(mFlags.begin(), mFlags.end());
309}
310
311void LineBreaker::computeBreaksOptimal() {
312    size_t active = 0;
313    size_t nCand = mCandidates.size();
314    for (size_t i = 1; i < nCand; i++) {
315        bool atEnd = i == nCand - 1;
316        float best = SCORE_INFTY;
317        size_t bestPrev = 0;
318
319        size_t lineNumberLast = mCandidates[active].lineNumber;
320        float width = mLineWidths.getLineWidth(lineNumberLast);
321        ParaWidth leftEdge = mCandidates[i].postBreak - width;
322        float bestHope = 0;
323
324        for (size_t j = active; j < i; j++) {
325            size_t lineNumber = mCandidates[j].lineNumber;
326            if (lineNumber != lineNumberLast) {
327                float widthNew = mLineWidths.getLineWidth(lineNumber);
328                if (widthNew != width) {
329                    leftEdge = mCandidates[i].postBreak - width;
330                    bestHope = 0;
331                    width = widthNew;
332                }
333                lineNumberLast = lineNumber;
334            }
335            float jScore = mCandidates[j].score;
336            if (jScore + bestHope >= best) continue;
337            float delta = mCandidates[j].preBreak - leftEdge;
338
339            float widthScore = computeScore(delta, atEnd);
340            if (delta < 0) {
341                active = j + 1;
342            } else {
343                bestHope = widthScore;
344            }
345
346            float score = jScore + widthScore;
347            if (score <= best) {
348                best = score;
349                bestPrev = j;
350            }
351        }
352        mCandidates[i].score = best + mCandidates[i].penalty;
353        mCandidates[i].prev = bestPrev;
354        mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
355    }
356    finishBreaksOptimal();
357}
358
359void LineBreaker::computeBreaksOptimalRect() {
360    size_t active = 0;
361    size_t nCand = mCandidates.size();
362    float width = mLineWidths.getLineWidth(0);
363    for (size_t i = 1; i < nCand; i++) {
364        bool atEnd = i == nCand - 1;
365        float best = SCORE_INFTY;
366        size_t bestPrev = 0;
367
368        // Width-based component of score increases as line gets shorter, so score will always be
369        // at least this.
370        float bestHope = 0;
371
372        ParaWidth leftEdge = mCandidates[i].postBreak - width;
373        for (size_t j = active; j < i; j++) {
374            // TODO performance: can break if bestHope >= best; worth it?
375            float jScore = mCandidates[j].score;
376            if (jScore + bestHope >= best) continue;
377            float delta = mCandidates[j].preBreak - leftEdge;
378
379            float widthScore = computeScore(delta, atEnd);
380            if (delta < 0) {
381                active = j + 1;
382            } else {
383                bestHope = widthScore;
384            }
385
386            float score = jScore + widthScore;
387            if (score <= best) {
388                best = score;
389                bestPrev = j;
390            }
391        }
392        mCandidates[i].score = best + mCandidates[i].penalty;
393        mCandidates[i].prev = bestPrev;
394    }
395    finishBreaksOptimal();
396}
397
398size_t LineBreaker::computeBreaks() {
399    if (mStrategy == kBreakStrategy_Greedy) {
400        computeBreaksGreedy();
401    } else if (mLineWidths.isConstant()) {
402        computeBreaksOptimalRect();
403    } else {
404        computeBreaksOptimal();
405    }
406    return mBreaks.size();
407}
408
409void LineBreaker::finish() {
410    mWidth = 0;
411    mCandidates.clear();
412    mBreaks.clear();
413    mWidths.clear();
414    mFlags.clear();
415    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
416        mTextBuf.clear();
417        mTextBuf.shrink_to_fit();
418        mCharWidths.clear();
419        mCharWidths.shrink_to_fit();
420        mHyphBuf.clear();
421        mHyphBuf.shrink_to_fit();
422        mCandidates.shrink_to_fit();
423        mBreaks.shrink_to_fit();
424        mWidths.shrink_to_fit();
425        mFlags.shrink_to_fit();
426    }
427    mStrategy = kBreakStrategy_Greedy;
428}
429
430}  // namespace android
431