LineBreaker.cpp revision 68ae337aa05f598af18b09fe4125146d4d67db83
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define VERBOSE_DEBUG 0
18
19#include <limits>
20
21#define LOG_TAG "Minikin"
22#include <cutils/log.h>
23
24#include <minikin/Layout.h>
25#include <minikin/LineBreaker.h>
26
27using std::vector;
28
29namespace android {
30
31const int CHAR_TAB = 0x0009;
32const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
33
34// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
35// constants are larger than any reasonable actual width score.
36const float SCORE_INFTY = std::numeric_limits<float>::max();
37const float SCORE_OVERFULL = 1e12f;
38const float SCORE_DESPERATE = 1e10f;
39
40// When the text buffer is within this limit, capacity of vectors is retained at finish(),
41// to avoid allocation.
42const size_t MAX_TEXT_BUF_RETAIN = 32678;
43
44void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
45    delete mBreakIterator;
46    UErrorCode status = U_ZERO_ERROR;
47    mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
48    // TODO: check status
49
50    // TODO: load actual resource dependent on locale; letting Minikin do it is a hack
51    mHyphenator = hyphenator;
52}
53
54void LineBreaker::setText() {
55    UErrorCode status = U_ZERO_ERROR;
56    utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status);
57    mBreakIterator->setText(&mUText, status);
58    mBreakIterator->first();
59
60    // handle initial break here because addStyleRun may never be called
61    mBreakIterator->next();
62    mCandidates.clear();
63    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0};
64    mCandidates.push_back(cand);
65
66    // reset greedy breaker state
67    mBreaks.clear();
68    mWidths.clear();
69    mFlags.clear();
70    mLastBreak = 0;
71    mBestBreak = 0;
72    mBestScore = SCORE_INFTY;
73    mPreBreak = 0;
74    mFirstTabIndex = INT_MAX;
75}
76
77void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
78    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
79}
80
81
82void LineBreaker::setIndents(const std::vector<float>& indents) {
83    mLineWidths.setIndents(indents);
84}
85
86// This function determines whether a character is a space that disappears at end of line.
87// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
88// plus '\n'.
89// Note: all such characters are in the BMP, so it's ok to use code units for this.
90static bool isLineEndSpace(uint16_t c) {
91    return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
92            c == 0x205F || c == 0x3000;
93}
94
95// Ordinarily, this method measures the text in the range given. However, when paint
96// is nullptr, it assumes the widths have already been calculated and stored in the
97// width buffer.
98// This method finds the candidate word breaks (using the ICU break iterator) and sends them
99// to addCandidate.
100float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface,
101        FontStyle style, size_t start, size_t end, bool isRtl) {
102    Layout layout;  // performance TODO: move layout to self object to reduce allocation cost?
103    float width = 0.0f;
104    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
105
106    float hyphenPenalty = 0.0;
107    if (paint != nullptr) {
108        layout.setFontCollection(typeface);
109        layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style,
110                *paint);
111        layout.getAdvances(mCharWidths.data() + start);
112        width = layout.getAdvance();
113
114        // a heuristic that seems to perform well
115        hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
116    }
117
118    size_t current = (size_t)mBreakIterator->current();
119    size_t wordEnd = start;
120    size_t lastBreak = start;
121    ParaWidth lastBreakWidth = mWidth;
122    ParaWidth postBreak = mWidth;
123    for (size_t i = start; i < end; i++) {
124        uint16_t c = mTextBuf[i];
125        if (c == CHAR_TAB) {
126            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
127            if (mFirstTabIndex == INT_MAX) {
128                mFirstTabIndex = (int)i;
129            }
130            // fall back to greedy; other modes don't know how to deal with tabs
131            mStrategy = kBreakStrategy_Greedy;
132        } else {
133            mWidth += mCharWidths[i];
134            if (!isLineEndSpace(c)) {
135                postBreak = mWidth;
136                wordEnd = i + 1;
137            }
138        }
139        if (i + 1 == current) {
140            // Override ICU's treatment of soft hyphen as a break opportunity, because we want it
141            // to be a hyphen break, with penalty and drawing behavior.
142            if (c != CHAR_SOFT_HYPHEN) {
143                if (paint != nullptr && mHyphenator != nullptr && wordEnd > lastBreak) {
144                    mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak);
145    #if VERBOSE_DEBUG
146                    std::string hyphenatedString;
147                    for (size_t j = lastBreak; j < wordEnd; j++) {
148                        if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-');
149                        // Note: only works with ASCII, should do UTF-8 conversion here
150                        hyphenatedString.push_back(buffer()[j]);
151                    }
152                    ALOGD("hyphenated string: %s", hyphenatedString.c_str());
153    #endif
154
155                    // measure hyphenated substrings
156                    for (size_t j = lastBreak; j < wordEnd; j++) {
157                        uint8_t hyph = mHyphBuf[j - lastBreak];
158                        if (hyph) {
159                            paint->hyphenEdit = hyph;
160                            layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak,
161                                    mTextBuf.size(), bidiFlags, style, *paint);
162                            ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance();
163                            paint->hyphenEdit = 0;
164                            layout.doLayout(mTextBuf.data(), j, wordEnd - j,
165                                    mTextBuf.size(), bidiFlags, style, *paint);
166                            ParaWidth hyphPreBreak = postBreak - layout.getAdvance();
167                            addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph);
168                        }
169                    }
170                }
171
172                // Skip break for zero-width characters.
173                if (current == mTextBuf.size() || mCharWidths[current] > 0) {
174                    addWordBreak(current, mWidth, postBreak, 0.0, 0);
175                }
176                lastBreak = current;
177                lastBreakWidth = mWidth;
178            }
179            current = (size_t)mBreakIterator->next();
180        }
181    }
182
183    return width;
184}
185
186// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
187// needed (ie when word exceeds current line width)
188void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
189        float penalty, uint8_t hyph) {
190    Candidate cand;
191    ParaWidth width = mCandidates.back().preBreak;
192    if (postBreak - width > currentLineWidth()) {
193        // Add desperate breaks.
194        // Note: these breaks are based on the shaping of the (non-broken) original text; they
195        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
196        size_t i = mCandidates.back().offset;
197        width += mCharWidths[i++];
198        for (; i < offset; i++) {
199            float w = mCharWidths[i];
200            if (w > 0) {
201                cand.offset = i;
202                cand.preBreak = width;
203                cand.postBreak = width;
204                cand.penalty = SCORE_DESPERATE;
205                cand.hyphenEdit = 0;
206#if VERBOSE_DEBUG
207                ALOGD("desperate cand: %d %g:%g",
208                        mCandidates.size(), cand.postBreak, cand.preBreak);
209#endif
210                addCandidate(cand);
211                width += w;
212            }
213        }
214    }
215
216    cand.offset = offset;
217    cand.preBreak = preBreak;
218    cand.postBreak = postBreak;
219    cand.penalty = penalty;
220    cand.hyphenEdit = hyph;
221#if VERBOSE_DEBUG
222    ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
223#endif
224    addCandidate(cand);
225}
226
227// TODO: for justified text, refine with shrink/stretch
228float LineBreaker::computeScore(float delta, bool atEnd) {
229    if (delta < 0) {
230        return SCORE_OVERFULL;
231    } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
232        return 0.0;
233    } else {
234        return delta * delta;
235    }
236}
237
238// TODO performance: could avoid populating mCandidates if greedy only
239void LineBreaker::addCandidate(Candidate cand) {
240    size_t candIndex = mCandidates.size();
241    mCandidates.push_back(cand);
242    if (cand.postBreak - mPreBreak > currentLineWidth()) {
243        // This break would create an overfull line, pick the best break and break there (greedy)
244        if (mBestBreak == mLastBreak) {
245            mBestBreak = candIndex;
246        }
247        pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak,
248                mCandidates[mBestBreak].hyphenEdit);
249        mBestScore = SCORE_INFTY;
250#if VERBOSE_DEBUG
251        ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
252#endif
253        mLastBreak = mBestBreak;
254        mPreBreak = mCandidates[mBestBreak].preBreak;
255    }
256    if (cand.penalty <= mBestScore) {
257        mBestBreak = candIndex;
258        mBestScore = cand.penalty;
259    }
260}
261
262void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) {
263    mBreaks.push_back(offset);
264    mWidths.push_back(width);
265    int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
266    flags |= hyph;
267    mFlags.push_back(flags);
268    mFirstTabIndex = INT_MAX;
269}
270
271void LineBreaker::addReplacement(size_t start, size_t end, float width) {
272    mCharWidths[start] = width;
273    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
274    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
275}
276
277float LineBreaker::currentLineWidth() const {
278    return mLineWidths.getLineWidth(mBreaks.size());
279}
280
281void LineBreaker::computeBreaksGreedy() {
282    // All breaks but the last have been added in addCandidate already.
283    size_t nCand = mCandidates.size();
284    if (nCand == 1 || mLastBreak != nCand - 1) {
285        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0);
286        // don't need to update mBestScore, because we're done
287#if VERBOSE_DEBUG
288        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
289#endif
290    }
291}
292
293// Follow "prev" links in mCandidates array, and copy to result arrays.
294void LineBreaker::finishBreaksOptimal() {
295    // clear existing greedy break result
296    mBreaks.clear();
297    mWidths.clear();
298    mFlags.clear();
299    size_t nCand = mCandidates.size();
300    size_t prev;
301    for (size_t i = nCand - 1; i > 0; i = prev) {
302        prev = mCandidates[i].prev;
303        mBreaks.push_back(mCandidates[i].offset);
304        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
305        mFlags.push_back(mCandidates[i].hyphenEdit);
306    }
307    std::reverse(mBreaks.begin(), mBreaks.end());
308    std::reverse(mWidths.begin(), mWidths.end());
309    std::reverse(mFlags.begin(), mFlags.end());
310}
311
312void LineBreaker::computeBreaksOptimal() {
313    size_t active = 0;
314    size_t nCand = mCandidates.size();
315    for (size_t i = 1; i < nCand; i++) {
316        bool atEnd = i == nCand - 1;
317        float best = SCORE_INFTY;
318        size_t bestPrev = 0;
319
320        size_t lineNumberLast = mCandidates[active].lineNumber;
321        float width = mLineWidths.getLineWidth(lineNumberLast);
322        ParaWidth leftEdge = mCandidates[i].postBreak - width;
323        float bestHope = 0;
324
325        for (size_t j = active; j < i; j++) {
326            size_t lineNumber = mCandidates[j].lineNumber;
327            if (lineNumber != lineNumberLast) {
328                float widthNew = mLineWidths.getLineWidth(lineNumber);
329                if (widthNew != width) {
330                    leftEdge = mCandidates[i].postBreak - width;
331                    bestHope = 0;
332                    width = widthNew;
333                }
334                lineNumberLast = lineNumber;
335            }
336            float jScore = mCandidates[j].score;
337            if (jScore + bestHope >= best) continue;
338            float delta = mCandidates[j].preBreak - leftEdge;
339
340            float widthScore = computeScore(delta, atEnd);
341            if (delta < 0) {
342                active = j + 1;
343            } else {
344                bestHope = widthScore;
345            }
346
347            float score = jScore + widthScore;
348            if (score <= best) {
349                best = score;
350                bestPrev = j;
351            }
352        }
353        mCandidates[i].score = best + mCandidates[i].penalty;
354        mCandidates[i].prev = bestPrev;
355        mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
356    }
357    finishBreaksOptimal();
358}
359
360void LineBreaker::computeBreaksOptimalRect() {
361    size_t active = 0;
362    size_t nCand = mCandidates.size();
363    float width = mLineWidths.getLineWidth(0);
364    for (size_t i = 1; i < nCand; i++) {
365        bool atEnd = i == nCand - 1;
366        float best = SCORE_INFTY;
367        size_t bestPrev = 0;
368
369        // Width-based component of score increases as line gets shorter, so score will always be
370        // at least this.
371        float bestHope = 0;
372
373        ParaWidth leftEdge = mCandidates[i].postBreak - width;
374        for (size_t j = active; j < i; j++) {
375            // TODO performance: can break if bestHope >= best; worth it?
376            float jScore = mCandidates[j].score;
377            if (jScore + bestHope >= best) continue;
378            float delta = mCandidates[j].preBreak - leftEdge;
379
380            float widthScore = computeScore(delta, atEnd);
381            if (delta < 0) {
382                active = j + 1;
383            } else {
384                bestHope = widthScore;
385            }
386
387            float score = jScore + widthScore;
388            if (score <= best) {
389                best = score;
390                bestPrev = j;
391            }
392        }
393        mCandidates[i].score = best + mCandidates[i].penalty;
394        mCandidates[i].prev = bestPrev;
395    }
396    finishBreaksOptimal();
397}
398
399size_t LineBreaker::computeBreaks() {
400    if (mStrategy == kBreakStrategy_Greedy) {
401        computeBreaksGreedy();
402    } else if (mLineWidths.isConstant()) {
403        computeBreaksOptimalRect();
404    } else {
405        computeBreaksOptimal();
406    }
407    return mBreaks.size();
408}
409
410void LineBreaker::finish() {
411    mWidth = 0;
412    mCandidates.clear();
413    mBreaks.clear();
414    mWidths.clear();
415    mFlags.clear();
416    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
417        mTextBuf.clear();
418        mTextBuf.shrink_to_fit();
419        mCharWidths.clear();
420        mCharWidths.shrink_to_fit();
421        mHyphBuf.clear();
422        mHyphBuf.shrink_to_fit();
423        mCandidates.shrink_to_fit();
424        mBreaks.shrink_to_fit();
425        mWidths.shrink_to_fit();
426        mFlags.shrink_to_fit();
427    }
428    mStrategy = kBreakStrategy_Greedy;
429}
430
431}  // namespace android
432