LineBreaker.cpp revision 0dc07c0be325b7c12c50729e04c4b2785a673fd7
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define VERBOSE_DEBUG 0
18
19#include <limits>
20
21#define LOG_TAG "Minikin"
22#include <cutils/log.h>
23
24#include <minikin/Layout.h>
25#include <minikin/LineBreaker.h>
26
27using std::vector;
28
29namespace android {
30
31const int CHAR_TAB = 0x0009;
32const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
33
34// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
35// constants are larger than any reasonable actual width score.
36const float SCORE_INFTY = std::numeric_limits<float>::max();
37const float SCORE_OVERFULL = 1e12f;
38const float SCORE_DESPERATE = 1e10f;
39
40// When the text buffer is within this limit, capacity of vectors is retained at finish(),
41// to avoid allocation.
42const size_t MAX_TEXT_BUF_RETAIN = 32678;
43
44void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
45    delete mBreakIterator;
46    UErrorCode status = U_ZERO_ERROR;
47    mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
48    // TODO: check status
49
50    // TODO: load actual resource dependent on locale; letting Minikin do it is a hack
51    mHyphenator = hyphenator;
52}
53
54void LineBreaker::setText() {
55    UErrorCode status = U_ZERO_ERROR;
56    utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status);
57    mBreakIterator->setText(&mUText, status);
58    mBreakIterator->first();
59
60    // handle initial break here because addStyleRun may never be called
61    mBreakIterator->next();
62    mCandidates.clear();
63    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0};
64    mCandidates.push_back(cand);
65
66    // reset greedy breaker state
67    mBreaks.clear();
68    mWidths.clear();
69    mFlags.clear();
70    mLastBreak = 0;
71    mBestBreak = 0;
72    mBestScore = SCORE_INFTY;
73    mPreBreak = 0;
74    mFirstTabIndex = INT_MAX;
75}
76
77void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
78    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
79}
80
81
82void LineBreaker::setIndents(const std::vector<float>& indents) {
83    mLineWidths.setIndents(indents);
84}
85
86// This function determines whether a character is a space that disappears at end of line.
87// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
88// plus '\n'.
89// Note: all such characters are in the BMP, so it's ok to use code units for this.
90static bool isLineEndSpace(uint16_t c) {
91    return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
92            c == 0x205F || c == 0x3000;
93}
94
95// Ordinarily, this method measures the text in the range given. However, when paint
96// is nullptr, it assumes the widths have already been calculated and stored in the
97// width buffer.
98// This method finds the candidate word breaks (using the ICU break iterator) and sends them
99// to addCandidate.
100float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface,
101        FontStyle style, size_t start, size_t end, bool isRtl) {
102    Layout layout;  // performance TODO: move layout to self object to reduce allocation cost?
103    float width = 0.0f;
104    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
105
106    float hyphenPenalty = 0.0;
107    if (paint != nullptr) {
108        layout.setFontCollection(typeface);
109        layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style,
110                *paint);
111        layout.getAdvances(mCharWidths.data() + start);
112        width = layout.getAdvance();
113
114        // a heuristic that seems to perform well
115        hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
116        if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
117            hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
118        }
119    }
120
121    size_t current = (size_t)mBreakIterator->current();
122    size_t wordEnd = start;
123    size_t lastBreak = start;
124    ParaWidth lastBreakWidth = mWidth;
125    ParaWidth postBreak = mWidth;
126    for (size_t i = start; i < end; i++) {
127        uint16_t c = mTextBuf[i];
128        if (c == CHAR_TAB) {
129            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
130            if (mFirstTabIndex == INT_MAX) {
131                mFirstTabIndex = (int)i;
132            }
133            // fall back to greedy; other modes don't know how to deal with tabs
134            mStrategy = kBreakStrategy_Greedy;
135        } else {
136            mWidth += mCharWidths[i];
137            if (!isLineEndSpace(c)) {
138                postBreak = mWidth;
139                wordEnd = i + 1;
140            }
141        }
142        if (i + 1 == current) {
143            // Override ICU's treatment of soft hyphen as a break opportunity, because we want it
144            // to be a hyphen break, with penalty and drawing behavior.
145            if (c != CHAR_SOFT_HYPHEN) {
146                if (paint != nullptr && mHyphenator != nullptr &&
147                        mHyphenationFrequency != kHyphenationFrequency_None &&
148                        wordEnd > lastBreak) {
149                    mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak);
150    #if VERBOSE_DEBUG
151                    std::string hyphenatedString;
152                    for (size_t j = lastBreak; j < wordEnd; j++) {
153                        if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-');
154                        // Note: only works with ASCII, should do UTF-8 conversion here
155                        hyphenatedString.push_back(buffer()[j]);
156                    }
157                    ALOGD("hyphenated string: %s", hyphenatedString.c_str());
158    #endif
159
160                    // measure hyphenated substrings
161                    for (size_t j = lastBreak; j < wordEnd; j++) {
162                        uint8_t hyph = mHyphBuf[j - lastBreak];
163                        if (hyph) {
164                            paint->hyphenEdit = hyph;
165                            layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak,
166                                    mTextBuf.size(), bidiFlags, style, *paint);
167                            ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance();
168                            paint->hyphenEdit = 0;
169                            layout.doLayout(mTextBuf.data(), j, wordEnd - j,
170                                    mTextBuf.size(), bidiFlags, style, *paint);
171                            ParaWidth hyphPreBreak = postBreak - layout.getAdvance();
172                            addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph);
173                        }
174                    }
175                }
176
177                // Skip break for zero-width characters.
178                if (current == mTextBuf.size() || mCharWidths[current] > 0) {
179                    addWordBreak(current, mWidth, postBreak, 0.0, 0);
180                }
181                lastBreak = current;
182                lastBreakWidth = mWidth;
183            }
184            current = (size_t)mBreakIterator->next();
185        }
186    }
187
188    return width;
189}
190
191// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
192// needed (ie when word exceeds current line width)
193void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
194        float penalty, uint8_t hyph) {
195    Candidate cand;
196    ParaWidth width = mCandidates.back().preBreak;
197    if (postBreak - width > currentLineWidth()) {
198        // Add desperate breaks.
199        // Note: these breaks are based on the shaping of the (non-broken) original text; they
200        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
201        size_t i = mCandidates.back().offset;
202        width += mCharWidths[i++];
203        for (; i < offset; i++) {
204            float w = mCharWidths[i];
205            if (w > 0) {
206                cand.offset = i;
207                cand.preBreak = width;
208                cand.postBreak = width;
209                cand.penalty = SCORE_DESPERATE;
210                cand.hyphenEdit = 0;
211#if VERBOSE_DEBUG
212                ALOGD("desperate cand: %d %g:%g",
213                        mCandidates.size(), cand.postBreak, cand.preBreak);
214#endif
215                addCandidate(cand);
216                width += w;
217            }
218        }
219    }
220
221    cand.offset = offset;
222    cand.preBreak = preBreak;
223    cand.postBreak = postBreak;
224    cand.penalty = penalty;
225    cand.hyphenEdit = hyph;
226#if VERBOSE_DEBUG
227    ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
228#endif
229    addCandidate(cand);
230}
231
232// TODO: for justified text, refine with shrink/stretch
233float LineBreaker::computeScore(float delta, bool atEnd) {
234    if (delta < 0) {
235        return SCORE_OVERFULL;
236    } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
237        return 0.0;
238    } else {
239        return delta * delta;
240    }
241}
242
243// TODO performance: could avoid populating mCandidates if greedy only
244void LineBreaker::addCandidate(Candidate cand) {
245    size_t candIndex = mCandidates.size();
246    mCandidates.push_back(cand);
247    if (cand.postBreak - mPreBreak > currentLineWidth()) {
248        // This break would create an overfull line, pick the best break and break there (greedy)
249        if (mBestBreak == mLastBreak) {
250            mBestBreak = candIndex;
251        }
252        pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak,
253                mCandidates[mBestBreak].hyphenEdit);
254        mBestScore = SCORE_INFTY;
255#if VERBOSE_DEBUG
256        ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
257#endif
258        mLastBreak = mBestBreak;
259        mPreBreak = mCandidates[mBestBreak].preBreak;
260    }
261    if (cand.penalty <= mBestScore) {
262        mBestBreak = candIndex;
263        mBestScore = cand.penalty;
264    }
265}
266
267void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) {
268    mBreaks.push_back(offset);
269    mWidths.push_back(width);
270    int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
271    flags |= hyph;
272    mFlags.push_back(flags);
273    mFirstTabIndex = INT_MAX;
274}
275
276void LineBreaker::addReplacement(size_t start, size_t end, float width) {
277    mCharWidths[start] = width;
278    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
279    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
280}
281
282float LineBreaker::currentLineWidth() const {
283    return mLineWidths.getLineWidth(mBreaks.size());
284}
285
286void LineBreaker::computeBreaksGreedy() {
287    // All breaks but the last have been added in addCandidate already.
288    size_t nCand = mCandidates.size();
289    if (nCand == 1 || mLastBreak != nCand - 1) {
290        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0);
291        // don't need to update mBestScore, because we're done
292#if VERBOSE_DEBUG
293        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
294#endif
295    }
296}
297
298// Follow "prev" links in mCandidates array, and copy to result arrays.
299void LineBreaker::finishBreaksOptimal() {
300    // clear existing greedy break result
301    mBreaks.clear();
302    mWidths.clear();
303    mFlags.clear();
304    size_t nCand = mCandidates.size();
305    size_t prev;
306    for (size_t i = nCand - 1; i > 0; i = prev) {
307        prev = mCandidates[i].prev;
308        mBreaks.push_back(mCandidates[i].offset);
309        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
310        mFlags.push_back(mCandidates[i].hyphenEdit);
311    }
312    std::reverse(mBreaks.begin(), mBreaks.end());
313    std::reverse(mWidths.begin(), mWidths.end());
314    std::reverse(mFlags.begin(), mFlags.end());
315}
316
317void LineBreaker::computeBreaksOptimal() {
318    size_t active = 0;
319    size_t nCand = mCandidates.size();
320    for (size_t i = 1; i < nCand; i++) {
321        bool atEnd = i == nCand - 1;
322        float best = SCORE_INFTY;
323        size_t bestPrev = 0;
324
325        size_t lineNumberLast = mCandidates[active].lineNumber;
326        float width = mLineWidths.getLineWidth(lineNumberLast);
327        ParaWidth leftEdge = mCandidates[i].postBreak - width;
328        float bestHope = 0;
329
330        for (size_t j = active; j < i; j++) {
331            size_t lineNumber = mCandidates[j].lineNumber;
332            if (lineNumber != lineNumberLast) {
333                float widthNew = mLineWidths.getLineWidth(lineNumber);
334                if (widthNew != width) {
335                    leftEdge = mCandidates[i].postBreak - width;
336                    bestHope = 0;
337                    width = widthNew;
338                }
339                lineNumberLast = lineNumber;
340            }
341            float jScore = mCandidates[j].score;
342            if (jScore + bestHope >= best) continue;
343            float delta = mCandidates[j].preBreak - leftEdge;
344
345            float widthScore = computeScore(delta, atEnd);
346            if (delta < 0) {
347                active = j + 1;
348            } else {
349                bestHope = widthScore;
350            }
351
352            float score = jScore + widthScore;
353            if (score <= best) {
354                best = score;
355                bestPrev = j;
356            }
357        }
358        mCandidates[i].score = best + mCandidates[i].penalty;
359        mCandidates[i].prev = bestPrev;
360        mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
361    }
362    finishBreaksOptimal();
363}
364
365void LineBreaker::computeBreaksOptimalRect() {
366    size_t active = 0;
367    size_t nCand = mCandidates.size();
368    float width = mLineWidths.getLineWidth(0);
369    for (size_t i = 1; i < nCand; i++) {
370        bool atEnd = i == nCand - 1;
371        float best = SCORE_INFTY;
372        size_t bestPrev = 0;
373
374        // Width-based component of score increases as line gets shorter, so score will always be
375        // at least this.
376        float bestHope = 0;
377
378        ParaWidth leftEdge = mCandidates[i].postBreak - width;
379        for (size_t j = active; j < i; j++) {
380            // TODO performance: can break if bestHope >= best; worth it?
381            float jScore = mCandidates[j].score;
382            if (jScore + bestHope >= best) continue;
383            float delta = mCandidates[j].preBreak - leftEdge;
384
385            float widthScore = computeScore(delta, atEnd);
386            if (delta < 0) {
387                active = j + 1;
388            } else {
389                bestHope = widthScore;
390            }
391
392            float score = jScore + widthScore;
393            if (score <= best) {
394                best = score;
395                bestPrev = j;
396            }
397        }
398        mCandidates[i].score = best + mCandidates[i].penalty;
399        mCandidates[i].prev = bestPrev;
400    }
401    finishBreaksOptimal();
402}
403
404size_t LineBreaker::computeBreaks() {
405    if (mStrategy == kBreakStrategy_Greedy) {
406        computeBreaksGreedy();
407    } else if (mLineWidths.isConstant()) {
408        computeBreaksOptimalRect();
409    } else {
410        computeBreaksOptimal();
411    }
412    return mBreaks.size();
413}
414
415void LineBreaker::finish() {
416    mWidth = 0;
417    mCandidates.clear();
418    mBreaks.clear();
419    mWidths.clear();
420    mFlags.clear();
421    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
422        mTextBuf.clear();
423        mTextBuf.shrink_to_fit();
424        mCharWidths.clear();
425        mCharWidths.shrink_to_fit();
426        mHyphBuf.clear();
427        mHyphBuf.shrink_to_fit();
428        mCandidates.shrink_to_fit();
429        mBreaks.shrink_to_fit();
430        mWidths.shrink_to_fit();
431        mFlags.shrink_to_fit();
432    }
433    mStrategy = kBreakStrategy_Greedy;
434    mHyphenationFrequency = kHyphenationFrequency_Normal;
435}
436
437}  // namespace android
438