LineBreaker.cpp revision 5cdad92c300a65cab89b172e952186f0c5870657
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define VERBOSE_DEBUG 0
18
19#include <limits>
20
21#define LOG_TAG "Minikin"
22#include <cutils/log.h>
23
24#include <minikin/Layout.h>
25#include <minikin/LineBreaker.h>
26
27using std::vector;
28
29namespace android {
30
31const int CHAR_TAB = 0x0009;
32const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
33
34// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
35// constants are larger than any reasonable actual width score.
36const float SCORE_INFTY = std::numeric_limits<float>::max();
37const float SCORE_OVERFULL = 1e12f;
38const float SCORE_DESPERATE = 1e10f;
39
40// When the text buffer is within this limit, capacity of vectors is retained at finish(),
41// to avoid allocation.
42const size_t MAX_TEXT_BUF_RETAIN = 32678;
43
44void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
45    delete mBreakIterator;
46    UErrorCode status = U_ZERO_ERROR;
47    mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
48    // TODO: check status
49
50    // TODO: load actual resource dependent on locale; letting Minikin do it is a hack
51    mHyphenator = hyphenator;
52}
53
54void LineBreaker::setText() {
55    UErrorCode status = U_ZERO_ERROR;
56    utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status);
57    mBreakIterator->setText(&mUText, status);
58    mBreakIterator->first();
59
60    // handle initial break here because addStyleRun may never be called
61    mBreakIterator->next();
62    mCandidates.clear();
63    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0};
64    mCandidates.push_back(cand);
65
66    // reset greedy breaker state
67    mBreaks.clear();
68    mWidths.clear();
69    mFlags.clear();
70    mLastBreak = 0;
71    mBestBreak = 0;
72    mBestScore = SCORE_INFTY;
73    mPreBreak = 0;
74    mFirstTabIndex = INT_MAX;
75}
76
77void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
78    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
79}
80
81// This function determines whether a character is a space that disappears at end of line.
82// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]]
83// Note: all such characters are in the BMP, so it's ok to use code units for this.
84static bool isLineEndSpace(uint16_t c) {
85    return c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || c == 0x205F ||
86            c == 0x3000;
87}
88
89// Ordinarily, this method measures the text in the range given. However, when paint
90// is nullptr, it assumes the widths have already been calculated and stored in the
91// width buffer.
92// This method finds the candidate word breaks (using the ICU break iterator) and sends them
93// to addCandidate.
94float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface,
95        FontStyle style, size_t start, size_t end, bool isRtl) {
96    Layout layout;  // performance TODO: move layout to self object to reduce allocation cost?
97    float width = 0.0f;
98    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
99
100    float hyphenPenalty = 0.0;
101    if (paint != nullptr) {
102        layout.setFontCollection(typeface);
103        layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style,
104                *paint);
105        layout.getAdvances(mCharWidths.data() + start);
106        width = layout.getAdvance();
107
108        // a heuristic that seems to perform well
109        hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
110    }
111
112    size_t current = (size_t)mBreakIterator->current();
113    size_t wordEnd = start;
114    size_t lastBreak = start;
115    ParaWidth lastBreakWidth = mWidth;
116    ParaWidth postBreak = mWidth;
117    for (size_t i = start; i < end; i++) {
118        uint16_t c = mTextBuf[i];
119        if (c == CHAR_TAB) {
120            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
121            if (mFirstTabIndex == INT_MAX) {
122                mFirstTabIndex = (int)i;
123            }
124            // fall back to greedy; other modes don't know how to deal with tabs
125            mStrategy = kBreakStrategy_Greedy;
126        } else {
127            mWidth += mCharWidths[i];
128            if (!isLineEndSpace(c)) {
129                postBreak = mWidth;
130                wordEnd = i + 1;
131            }
132        }
133        if (i + 1 == current) {
134            // Override ICU's treatment of soft hyphen as a break opportunity, because we want it
135            // to be a hyphen break, with penalty and drawing behavior.
136            if (c != CHAR_SOFT_HYPHEN) {
137                if (paint != nullptr && mHyphenator != nullptr && wordEnd > lastBreak) {
138                    mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak);
139    #if VERBOSE_DEBUG
140                    std::string hyphenatedString;
141                    for (size_t j = lastBreak; j < wordEnd; j++) {
142                        if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-');
143                        // Note: only works with ASCII, should do UTF-8 conversion here
144                        hyphenatedString.push_back(buffer()[j]);
145                    }
146                    ALOGD("hyphenated string: %s", hyphenatedString.c_str());
147    #endif
148
149                    // measure hyphenated substrings
150                    for (size_t j = lastBreak; j < wordEnd; j++) {
151                        uint8_t hyph = mHyphBuf[j - lastBreak];
152                        if (hyph) {
153                            paint->hyphenEdit = hyph;
154                            layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak,
155                                    mTextBuf.size(), bidiFlags, style, *paint);
156                            ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance();
157                            paint->hyphenEdit = 0;
158                            layout.doLayout(mTextBuf.data(), j, wordEnd - j,
159                                    mTextBuf.size(), bidiFlags, style, *paint);
160                            ParaWidth hyphPreBreak = postBreak - layout.getAdvance();
161                            addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph);
162                        }
163                    }
164                }
165
166                // Skip break for zero-width characters.
167                if (current == mTextBuf.size() || mCharWidths[current] > 0) {
168                    addWordBreak(current, mWidth, postBreak, 0.0, 0);
169                }
170                lastBreak = current;
171                lastBreakWidth = mWidth;
172            }
173            current = (size_t)mBreakIterator->next();
174        }
175    }
176
177    return width;
178}
179
180// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
181// needed (ie when word exceeds current line width)
182void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
183        float penalty, uint8_t hyph) {
184    Candidate cand;
185    ParaWidth width = mCandidates.back().preBreak;
186    if (postBreak - width > currentLineWidth()) {
187        // Add desperate breaks.
188        // Note: these breaks are based on the shaping of the (non-broken) original text; they
189        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
190        size_t i = mCandidates.back().offset;
191        width += mCharWidths[i++];
192        for (; i < offset; i++) {
193            float w = mCharWidths[i];
194            if (w > 0) {
195                cand.offset = i;
196                cand.preBreak = width;
197                cand.postBreak = width;
198                cand.penalty = SCORE_DESPERATE;
199                cand.hyphenEdit = 0;
200#if VERBOSE_DEBUG
201                ALOGD("desperate cand: %d %g:%g",
202                        mCandidates.size(), cand.postBreak, cand.preBreak);
203#endif
204                addCandidate(cand);
205                width += w;
206            }
207        }
208    }
209
210    cand.offset = offset;
211    cand.preBreak = preBreak;
212    cand.postBreak = postBreak;
213    cand.penalty = penalty;
214    cand.hyphenEdit = hyph;
215#if VERBOSE_DEBUG
216    ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
217#endif
218    addCandidate(cand);
219}
220
221// TODO: for justified text, refine with shrink/stretch
222float LineBreaker::computeScore(float delta, bool atEnd) {
223    if (delta < 0) {
224        return SCORE_OVERFULL;
225    } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
226        return 0.0;
227    } else {
228        return delta * delta;
229    }
230}
231
232// TODO performance: could avoid populating mCandidates if greedy only
233void LineBreaker::addCandidate(Candidate cand) {
234    size_t candIndex = mCandidates.size();
235    mCandidates.push_back(cand);
236    if (cand.postBreak - mPreBreak > currentLineWidth()) {
237        // This break would create an overfull line, pick the best break and break there (greedy)
238        if (mBestBreak == mLastBreak) {
239            mBestBreak = candIndex;
240        }
241        pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak,
242                mCandidates[mBestBreak].hyphenEdit);
243        mBestScore = SCORE_INFTY;
244#if VERBOSE_DEBUG
245        ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
246#endif
247        mLastBreak = mBestBreak;
248        mPreBreak = mCandidates[mBestBreak].preBreak;
249    }
250    if (cand.penalty <= mBestScore) {
251        mBestBreak = candIndex;
252        mBestScore = cand.penalty;
253    }
254}
255
256void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) {
257    mBreaks.push_back(offset);
258    mWidths.push_back(width);
259    int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
260    flags |= hyph;
261    mFlags.push_back(flags);
262    mFirstTabIndex = INT_MAX;
263}
264
265void LineBreaker::addReplacement(size_t start, size_t end, float width) {
266    mCharWidths[start] = width;
267    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
268    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
269}
270
271float LineBreaker::currentLineWidth() const {
272    return mLineWidths.getLineWidth(mBreaks.size());
273}
274
275void LineBreaker::computeBreaksGreedy() {
276    // All breaks but the last have been added in addCandidate already.
277    size_t nCand = mCandidates.size();
278    if (nCand == 1 || mLastBreak != nCand - 1) {
279        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0);
280        // don't need to update mBestScore, because we're done
281#if VERBOSE_DEBUG
282        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
283#endif
284    }
285}
286
287// Follow "prev" links in mCandidates array, and copy to result arrays.
288void LineBreaker::finishBreaksOptimal() {
289    // clear existing greedy break result
290    mBreaks.clear();
291    mWidths.clear();
292    mFlags.clear();
293    size_t nCand = mCandidates.size();
294    size_t prev;
295    for (size_t i = nCand - 1; i > 0; i = prev) {
296        prev = mCandidates[i].prev;
297        mBreaks.push_back(mCandidates[i].offset);
298        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
299        mFlags.push_back(mCandidates[i].hyphenEdit);
300    }
301    std::reverse(mBreaks.begin(), mBreaks.end());
302    std::reverse(mWidths.begin(), mWidths.end());
303    std::reverse(mFlags.begin(), mFlags.end());
304}
305
306void LineBreaker::computeBreaksOptimal() {
307    size_t active = 0;
308    size_t nCand = mCandidates.size();
309    for (size_t i = 1; i < nCand; i++) {
310        bool atEnd = i == nCand - 1;
311        float best = SCORE_INFTY;
312        size_t bestPrev = 0;
313
314        size_t lineNumberLast = mCandidates[active].lineNumber;
315        float width = mLineWidths.getLineWidth(lineNumberLast);
316        ParaWidth leftEdge = mCandidates[i].postBreak - width;
317        float bestHope = 0;
318
319        for (size_t j = active; j < i; j++) {
320            size_t lineNumber = mCandidates[j].lineNumber;
321            if (lineNumber != lineNumberLast) {
322                float widthNew = mLineWidths.getLineWidth(lineNumber);
323                if (widthNew != width) {
324                    leftEdge = mCandidates[i].postBreak - width;
325                    bestHope = 0;
326                    width = widthNew;
327                }
328                lineNumberLast = lineNumber;
329            }
330            float jScore = mCandidates[j].score;
331            if (jScore + bestHope >= best) continue;
332            float delta = mCandidates[j].preBreak - leftEdge;
333
334            float widthScore = computeScore(delta, atEnd);
335            if (delta < 0) {
336                active = j + 1;
337            } else {
338                bestHope = widthScore;
339            }
340
341            float score = jScore + widthScore;
342            if (score <= best) {
343                best = score;
344                bestPrev = j;
345            }
346        }
347        mCandidates[i].score = best + mCandidates[i].penalty;
348        mCandidates[i].prev = bestPrev;
349        mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
350    }
351    finishBreaksOptimal();
352}
353
354void LineBreaker::computeBreaksOptimalRect() {
355    size_t active = 0;
356    size_t nCand = mCandidates.size();
357    float width = mLineWidths.getLineWidth(0);
358    for (size_t i = 1; i < nCand; i++) {
359        bool atEnd = i == nCand - 1;
360        float best = SCORE_INFTY;
361        size_t bestPrev = 0;
362
363        // Width-based component of score increases as line gets shorter, so score will always be
364        // at least this.
365        float bestHope = 0;
366
367        ParaWidth leftEdge = mCandidates[i].postBreak - width;
368        for (size_t j = active; j < i; j++) {
369            // TODO performance: can break if bestHope >= best; worth it?
370            float jScore = mCandidates[j].score;
371            if (jScore + bestHope >= best) continue;
372            float delta = mCandidates[j].preBreak - leftEdge;
373
374            float widthScore = computeScore(delta, atEnd);
375            if (delta < 0) {
376                active = j + 1;
377            } else {
378                bestHope = widthScore;
379            }
380
381            float score = jScore + widthScore;
382            if (score <= best) {
383                best = score;
384                bestPrev = j;
385            }
386        }
387        mCandidates[i].score = best + mCandidates[i].penalty;
388        mCandidates[i].prev = bestPrev;
389    }
390    finishBreaksOptimal();
391}
392
393size_t LineBreaker::computeBreaks() {
394    if (mStrategy == kBreakStrategy_Greedy) {
395        computeBreaksGreedy();
396    } else if (mLineWidths.isConstant()) {
397        computeBreaksOptimalRect();
398    } else {
399        computeBreaksOptimal();
400    }
401    return mBreaks.size();
402}
403
404void LineBreaker::finish() {
405    mWidth = 0;
406    mCandidates.clear();
407    mBreaks.clear();
408    mWidths.clear();
409    mFlags.clear();
410    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
411        mTextBuf.clear();
412        mTextBuf.shrink_to_fit();
413        mCharWidths.clear();
414        mCharWidths.shrink_to_fit();
415        mHyphBuf.clear();
416        mHyphBuf.shrink_to_fit();
417        mCandidates.shrink_to_fit();
418        mBreaks.shrink_to_fit();
419        mWidths.shrink_to_fit();
420        mFlags.shrink_to_fit();
421    }
422    mStrategy = kBreakStrategy_Greedy;
423}
424
425}  // namespace android
426