LineBreaker.cpp revision 0b25d5ac85533f64764a0d53d5e5d33b46b715fa
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define VERBOSE_DEBUG 0
18
19#include <limits>
20
21#define LOG_TAG "Minikin"
22#include <cutils/log.h>
23
24#include <minikin/Layout.h>
25#include <minikin/LineBreaker.h>
26
27using std::vector;
28
29namespace android {
30
31const int CHAR_TAB = 0x0009;
32
33// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
34// constants are larger than any reasonable actual width score.
35const float SCORE_INFTY = std::numeric_limits<float>::max();
36const float SCORE_OVERFULL = 1e12f;
37const float SCORE_DESPERATE = 1e10f;
38
39// When the text buffer is within this limit, capacity of vectors is retained at finish(),
40// to avoid allocation.
41const size_t MAX_TEXT_BUF_RETAIN = 32678;
42
43void LineBreaker::setText() {
44    UErrorCode status = U_ZERO_ERROR;
45    utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status);
46    mBreakIterator->setText(&mUText, status);
47    mBreakIterator->first();
48
49    // handle initial break here because addStyleRun may never be called
50    mBreakIterator->next();
51    mCandidates.clear();
52    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0};
53    mCandidates.push_back(cand);
54
55    // reset greedy breaker state
56    mBreaks.clear();
57    mWidths.clear();
58    mFlags.clear();
59    mLastBreak = 0;
60    mBestBreak = 0;
61    mBestScore = SCORE_INFTY;
62    mPreBreak = 0;
63    mFirstTabIndex = INT_MAX;
64}
65
66void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
67    ALOGD("width %f", firstWidth);
68    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
69}
70
71// This function determines whether a character is a space that disappears at end of line.
72// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]]
73// Note: all such characters are in the BMP, so it's ok to use code units for this.
74static bool isLineEndSpace(uint16_t c) {
75    return c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || c == 0x205F ||
76            c == 0x3000;
77}
78
79// Ordinarily, this method measures the text in the range given. However, when paint
80// is nullptr, it assumes the widths have already been calculated and stored in the
81// width buffer.
82// This method finds the candidate word breaks (using the ICU break iterator) and sends them
83// to addCandidate.
84float LineBreaker::addStyleRun(const MinikinPaint* paint, const FontCollection* typeface,
85        FontStyle style, size_t start, size_t end, bool isRtl) {
86    Layout layout;  // performance TODO: move layout to self object to reduce allocation cost?
87    float width = 0.0f;
88    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
89
90    if (paint != nullptr) {
91        layout.setFontCollection(typeface);
92        layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style,
93                *paint);
94        layout.getAdvances(mCharWidths.data() + start);
95        width = layout.getAdvance();
96    }
97
98    ParaWidth postBreak = mWidth;
99    size_t current = (size_t)mBreakIterator->current();
100    for (size_t i = start; i < end; i++) {
101        uint16_t c = mTextBuf[i];
102        if (c == CHAR_TAB) {
103            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
104            if (mFirstTabIndex == INT_MAX) {
105                mFirstTabIndex = (int)i;
106            }
107            // fall back to greedy; other modes don't know how to deal with tabs
108            mStrategy = kBreakStrategy_Greedy;
109        } else {
110            mWidth += mCharWidths[i];
111            if (!isLineEndSpace(c)) {
112                postBreak = mWidth;
113            }
114        }
115        if (i + 1 == current) {
116            // TODO: hyphenation goes here
117
118            // Skip break for zero-width characters.
119            if (current == mTextBuf.size() || mCharWidths[current] > 0) {
120                addWordBreak(current, mWidth, postBreak, 0);
121            }
122            current = (size_t)mBreakIterator->next();
123        }
124    }
125
126    return width;
127}
128
129// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
130// needed (ie when word exceeds current line width)
131void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
132        float penalty) {
133    Candidate cand;
134    ParaWidth width = mCandidates.back().preBreak;
135    if (postBreak - width > currentLineWidth()) {
136        // Add desperate breaks.
137        // Note: these breaks are based on the shaping of the (non-broken) original text; they
138        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
139        size_t i = mCandidates.back().offset;
140        width += mCharWidths[i++];
141        for (; i < offset; i++) {
142            float w = mCharWidths[i];
143            if (w > 0) {
144                cand.offset = i;
145                cand.preBreak = width;
146                cand.postBreak = width;
147                cand.penalty = SCORE_DESPERATE;
148#if VERBOSE_DEBUG
149                ALOGD("desperate cand: %d %g:%g",
150                        mCandidates.size(), cand.postBreak, cand.preBreak);
151#endif
152                addCandidate(cand);
153                width += w;
154            }
155        }
156    }
157
158    cand.offset = offset;
159    cand.preBreak = preBreak;
160    cand.postBreak = postBreak;
161    cand.penalty = penalty;
162#if VERBOSE_DEBUG
163    ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
164#endif
165    addCandidate(cand);
166}
167
168// TODO performance: could avoid populating mCandidates if greedy only
169void LineBreaker::addCandidate(Candidate cand) {
170    size_t candIndex = mCandidates.size();
171    mCandidates.push_back(cand);
172    if (cand.postBreak - mPreBreak > currentLineWidth()) {
173        // This break would create an overfull line, pick the best break and break there (greedy)
174        if (mBestBreak == mLastBreak) {
175            mBestBreak = candIndex;
176        }
177        mBreaks.push_back(mCandidates[mBestBreak].offset);
178        mWidths.push_back(mCandidates[mBestBreak].postBreak - mPreBreak);
179        mFlags.push_back(mFirstTabIndex < mBreaks.back());
180        mFirstTabIndex = INT_MAX;
181        mBestScore = SCORE_INFTY;
182#if VERBOSE_DEBUG
183        ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
184#endif
185        mLastBreak = mBestBreak;
186        mPreBreak = mCandidates[mBestBreak].preBreak;
187    }
188    if (cand.penalty <= mBestScore) {
189        mBestBreak = candIndex;
190        mBestScore = cand.penalty;
191    }
192}
193
194void LineBreaker::addReplacement(size_t start, size_t end, float width) {
195    mCharWidths[start] = width;
196    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
197    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
198}
199
200float LineBreaker::currentLineWidth() const {
201    return mLineWidths.getLineWidth(mBreaks.size());
202}
203
204void LineBreaker::computeBreaksGreedy() {
205    // All breaks but the last have been added in addCandidate already.
206    size_t nCand = mCandidates.size();
207    if (nCand == 1 || mLastBreak != nCand - 1) {
208        mBreaks.push_back(mCandidates[nCand - 1].offset);
209        mWidths.push_back(mCandidates[nCand - 1].postBreak - mPreBreak);
210        mFlags.push_back(mFirstTabIndex < mBreaks.back());
211        // don't need to update mFirstTabIndex or mBestScore, because we're done
212#if VERBOSE_DEBUG
213        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
214#endif
215    }
216}
217
218void LineBreaker::computeBreaksOpt() {
219    // clear existing greedy break result
220    mBreaks.clear();
221    mWidths.clear();
222    mFlags.clear();
223    size_t active = 0;
224    size_t nCand = mCandidates.size();
225    float width = mLineWidths.getLineWidth(0);
226    // TODO: actually support non-constant width
227    for (size_t i = 1; i < nCand; i++) {
228        bool stretchIsFree = mStrategy != kBreakStrategy_Balanced && i == nCand - 1;
229        float best = SCORE_INFTY;
230        size_t bestPrev = 0;
231
232        // Width-based component of score increases as line gets shorter, so score will always be
233        // at least this.
234        float bestHope = 0;
235
236        ParaWidth leftEdge = mCandidates[i].postBreak - width;
237        for (size_t j = active; j < i; j++) {
238            float jScore = mCandidates[j].score;
239            if (jScore + bestHope >= best) continue;
240            float delta = mCandidates[j].preBreak - leftEdge;
241
242            // TODO: for justified text, refine with shrink/stretch
243            float widthScore;
244            if (delta < 0) {
245                widthScore = SCORE_OVERFULL;
246                active = j + 1;
247            } else {
248                widthScore = stretchIsFree ? 0 : delta * delta;
249                bestHope = widthScore;
250            }
251
252            float score = jScore + widthScore;
253            if (score <= best) {
254                best = score;
255                bestPrev = j;
256            }
257        }
258        mCandidates[i].score = best + mCandidates[i].penalty;
259        mCandidates[i].prev = bestPrev;
260    }
261    size_t prev;
262    for (size_t i = nCand - 1; i > 0; i = prev) {
263        prev = mCandidates[i].prev;
264        mBreaks.push_back(mCandidates[i].offset);
265        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
266        mFlags.push_back(0);
267    }
268    std::reverse(mBreaks.begin(), mBreaks.end());
269    std::reverse(mWidths.begin(), mWidths.end());
270    std::reverse(mFlags.begin(), mFlags.end());
271}
272
273size_t LineBreaker::computeBreaks() {
274    if (mStrategy == kBreakStrategy_Greedy) {
275        computeBreaksGreedy();
276    } else {
277        computeBreaksOpt();
278    }
279    return mBreaks.size();
280}
281
282void LineBreaker::finish() {
283    mWidth = 0;
284    mCandidates.clear();
285    mBreaks.clear();
286    mWidths.clear();
287    mFlags.clear();
288    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
289        mTextBuf.clear();
290        mTextBuf.shrink_to_fit();
291        mCharWidths.clear();
292        mCharWidths.shrink_to_fit();
293        mCandidates.shrink_to_fit();
294        mBreaks.shrink_to_fit();
295        mWidths.shrink_to_fit();
296        mFlags.shrink_to_fit();
297    }
298    mStrategy = kBreakStrategy_Greedy;
299}
300
301}  // namespace android
302