LineBreaker.cpp revision 0948fbb63636111c193365e01dbe952defd700f3
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "LineBreakerImpl.h"
18
19#include <algorithm>
20#include <limits>
21
22#include "minikin/Characters.h"
23#include "minikin/Layout.h"
24#include "minikin/Range.h"
25#include "minikin/U16StringPiece.h"
26
27#include "HyphenatorMap.h"
28#include "LayoutUtils.h"
29#include "Locale.h"
30#include "LocaleListCache.h"
31#include "MinikinInternal.h"
32#include "WordBreaker.h"
33
34namespace minikin {
35
36// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
37// constants are larger than any reasonable actual width score.
38const float SCORE_INFTY = std::numeric_limits<float>::max();
39const float SCORE_OVERFULL = 1e12f;
40const float SCORE_DESPERATE = 1e10f;
41
42// Multiplier for hyphen penalty on last line.
43const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
44// Penalty assigned to each line break (to try to minimize number of lines)
45// TODO: when we implement full justification (so spaces can shrink and stretch), this is
46// probably not the most appropriate method.
47const float LINE_PENALTY_MULTIPLIER = 2.0f;
48
49// Penalty assigned to shrinking the whitepsace.
50const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
51
52// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
53// unreasonably long words. This is somewhat of a heuristic because extremely long words
54// are possible in some languages. This does mean that very long real words can get
55// broken by desperate breaks, with no hyphens.
56const size_t LONGEST_HYPHENATED_WORD = 45;
57
58// Maximum amount that spaces can shrink, in justified text.
59const float SHRINKABILITY = 1.0 / 3.0;
60
61constexpr size_t LAST_BREAK_OFFSET_NOWHERE = SIZE_MAX;
62constexpr size_t LAST_BREAK_OFFSET_DESPERATE = LAST_BREAK_OFFSET_NOWHERE - 1;
63
64inline static const LocaleList& getLocaleList(uint32_t localeListId) {
65    android::AutoMutex _l(gMinikinLock);
66    return LocaleListCache::getById(localeListId);
67}
68
69LineBreakerImpl::LineBreakerImpl(const U16StringPiece& textBuffer, BreakStrategy strategy,
70                                 HyphenationFrequency frequency, bool justified)
71        : LineBreakerImpl(std::make_unique<WordBreaker>(), textBuffer, strategy, frequency,
72                          justified) {}
73
74LineBreakerImpl::LineBreakerImpl(std::unique_ptr<WordBreaker>&& breaker,
75                                 const U16StringPiece& textBuffer, BreakStrategy strategy,
76                                 HyphenationFrequency frequency, bool justified)
77        : mCurrentLocaleListId(LocaleListCache::kInvalidListId),
78          mWordBreaker(std::move(breaker)),
79          mTextBuf(textBuffer),
80          mStrategy(strategy),
81          mHyphenationFrequency(frequency),
82          mJustified(justified),
83          mLastConsideredGreedyCandidate(LAST_BREAK_OFFSET_NOWHERE),
84          mSpaceCount(0) {
85    mWordBreaker->setText(mTextBuf.data(), mTextBuf.size());
86
87    // handle initial break here because addStyleRun may never be called
88    mCandidates.push_back({0 /* offset */, 0.0 /* preBreak */, 0.0 /* postBreak */,
89                           0.0 /* firstOverhang */, 0.0 /* secondOverhang */, 0.0 /* penalty */,
90                           0 /* preSpaceCount */, 0 /* postSpaceCount */,
91                           HyphenationType::DONT_BREAK /* hyphenType */,
92                           false /* isRtl. TODO: may need to be based on input. */});
93}
94
95LineBreakerImpl::~LineBreakerImpl() {}
96
97const LineBreakerImpl::Candidate& LineBreakerImpl::getLastBreakCandidate() const {
98    MINIKIN_ASSERT(mLastGreedyBreakIndex != LAST_BREAK_OFFSET_NOWHERE,
99                   "Line break hasn't started.");
100    return mLastGreedyBreakIndex == LAST_BREAK_OFFSET_DESPERATE
101                   ? mFakeDesperateCandidate
102                   : mCandidates[mLastGreedyBreakIndex];
103}
104
105void LineBreakerImpl::setLocaleList(uint32_t localeListId, size_t restartFrom) {
106    if (mCurrentLocaleListId == localeListId) {
107        return;
108    }
109
110    const LocaleList& newLocales = getLocaleList(localeListId);
111    const Locale newLocale = newLocales.empty() ? Locale() : newLocales[0];
112    const uint64_t newLocaleId = newLocale.getIdentifier();
113
114    const bool needUpdate =
115            // The first time setLocale is called.
116            mCurrentLocaleListId == LocaleListCache::kInvalidListId ||
117            // The effective locale is changed.
118            newLocaleId != mCurrentLocaleId;
119
120    // For now, we ignore all locales except the first valid one.
121    // TODO: Support selecting the locale based on the script of the text.
122    mCurrentLocaleListId = localeListId;
123    mCurrentLocaleId = newLocaleId;
124    if (needUpdate) {
125        mWordBreaker->followingWithLocale(newLocale, restartFrom);
126        mHyphenator = HyphenatorMap::lookup(newLocale);
127    }
128}
129
130// This function determines whether a character is a space that disappears at end of line.
131// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
132// plus '\n'.
133// Note: all such characters are in the BMP, so it's ok to use code units for this.
134static bool isLineEndSpace(uint16_t c) {
135    return c == '\n' || c == ' '                           // SPACE
136           || c == 0x1680                                  // OGHAM SPACE MARK
137           || (0x2000 <= c && c <= 0x200A && c != 0x2007)  // EN QUAD, EM QUAD, EN SPACE, EM SPACE,
138                                                           // THREE-PER-EM SPACE, FOUR-PER-EM SPACE,
139                                                           // SIX-PER-EM SPACE, PUNCTUATION SPACE,
140                                                           // THIN SPACE, HAIR SPACE
141           || c == 0x205F                                  // MEDIUM MATHEMATICAL SPACE
142           || c == 0x3000;                                 // IDEOGRAPHIC SPACE
143}
144
145// Hyphenates a string potentially containing non-breaking spaces.
146std::vector<HyphenationType> LineBreakerImpl::hyphenate(const U16StringPiece& str) {
147    std::vector<HyphenationType> out;
148    const size_t len = str.size();
149    out.reserve(len);
150
151    // A word here is any consecutive string of non-NBSP characters.
152    bool inWord = false;
153    size_t wordStart = 0;  // The initial value will never be accessed, but just in case.
154    for (size_t i = 0; i <= len; i++) {
155        if (i == len || str[i] == CHAR_NBSP) {
156            if (inWord) {
157                // A word just ended. Hyphenate it.
158                const size_t wordLen = i - wordStart;
159                if (wordLen <= LONGEST_HYPHENATED_WORD) {
160                    if (wordStart == 0) {
161                        // The string starts with a word. Use out directly.
162                        mHyphenator->hyphenate(&out, str.data(), wordLen);
163                    } else {
164                        std::vector<HyphenationType> wordVec;
165                        mHyphenator->hyphenate(&wordVec, str.data() + wordStart, wordLen);
166                        out.insert(out.end(), wordVec.cbegin(), wordVec.cend());
167                    }
168                } else {  // Word is too long. Inefficient to hyphenate.
169                    out.insert(out.end(), wordLen, HyphenationType::DONT_BREAK);
170                }
171                inWord = false;
172            }
173            if (i < len) {
174                // Insert one DONT_BREAK for the NBSP.
175                out.push_back(HyphenationType::DONT_BREAK);
176            }
177        } else if (!inWord) {
178            inWord = true;
179            wordStart = i;
180        }
181    }
182    return out;
183}
184
185// Compute the total overhang of text based on per-cluster advances and overhangs.
186// The two input vectors are expected to be of the same size.
187/* static */ LayoutOverhang LineBreakerImpl::computeOverhang(
188        float totalAdvance, const std::vector<float>& advances,
189        const std::vector<LayoutOverhang>& overhangs, bool isRtl) {
190    ParaWidth left = 0.0;
191    ParaWidth right = 0.0;
192    ParaWidth seenAdvance = 0.0;
193    const size_t len = advances.size();
194    if (isRtl) {
195        for (size_t i = 0; i < len; i++) {
196            right = std::max(right, overhangs[i].right - seenAdvance);
197            seenAdvance += advances[i];
198            left = std::max(left, overhangs[i].left - (totalAdvance - seenAdvance));
199        }
200    } else {
201        for (size_t i = 0; i < len; i++) {
202            left = std::max(left, overhangs[i].left - seenAdvance);
203            seenAdvance += advances[i];
204            right = std::max(right, overhangs[i].right - (totalAdvance - seenAdvance));
205        }
206    }
207    return {static_cast<float>(left), static_cast<float>(right)};
208}
209
210// This adds all the hyphenation candidates for a given word by first finding all the hyphenation
211// points and then calling addWordBreak for each.
212//
213// paint will be used for measuring the text and paint must not be null.
214// wordRange is the range for the word.
215// contextRange is the range from the last word breakpoint to the first code unit after the word.
216// For example, if the word starts with the punctuations or ends with spaces, the contextRange
217// contains both punctuations and trailing spaces but wordRange excludes them.
218// lastBreakWidth is the width seen until the begining of context range.
219// bidiFlags keep the bidi flags to determine the direction of text for layout and other
220//     calculations. It may only be Bidi::FORCE_RTL or Bidi::FORCE_LTR.
221//
222// The following parameters are needed to be passed to addWordBreak:
223// postBreak is the width that would be seen if we decide to break at the end of the word (so it
224//     doesn't count any line-ending space after the word).
225// postSpaceCount is the number of spaces that would be seen if we decide to break at the end of the
226//     word (and like postBreak it doesn't count any line-ending space after the word).
227// hyphenPenalty is the amount of penalty for hyphenation.
228void LineBreakerImpl::addHyphenationCandidates(const Run& run, const Range& contextRange,
229                                               const Range& wordRange, ParaWidth lastBreakWidth,
230                                               ParaWidth postBreak, size_t postSpaceCount,
231                                               float hyphenPenalty) {
232    MINIKIN_ASSERT(contextRange.contains(wordRange), "Context must contain word range");
233
234    const bool isRtlWord = run.isRtl();
235    const std::vector<HyphenationType> hyphenResult = hyphenate(mTextBuf.substr(wordRange));
236
237    std::vector<float> advances;
238    std::vector<LayoutOverhang> overhangs;
239    advances.reserve(contextRange.getLength());
240    overhangs.reserve(contextRange.getLength());
241    // measure hyphenated substrings
242    for (size_t j : wordRange) {
243        HyphenationType hyph = hyphenResult[wordRange.toRangeOffset(j)];
244        if (hyph == HyphenationType::DONT_BREAK) {
245            continue;
246        }
247
248        auto hyphenPart = contextRange.split(j);
249        const Range& firstPart = hyphenPart.first;
250        const Range& secondPart = hyphenPart.second;
251
252        const size_t firstPartLen = firstPart.getLength();
253        advances.resize(firstPartLen);
254        overhangs.resize(firstPartLen);
255        const float firstPartWidth =
256                run.measureHyphenPiece(mTextBuf, firstPart, StartHyphenEdit::NO_EDIT,
257                                       editForThisLine(hyph), advances.data(), overhangs.data());
258        const ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
259        LayoutOverhang overhang = computeOverhang(firstPartWidth, advances, overhangs, isRtlWord);
260        // TODO: This ignores potential overhang from a previous word, e.g. in "R table" if the
261        // right overhang of the R is larger than the advance of " ta-". In such cases, we need to
262        // take the existing overhang into account.
263        const float firstOverhang = isRtlWord ? overhang.left : overhang.right;
264
265        const size_t secondPartLen = secondPart.getLength();
266        advances.resize(secondPartLen);
267        overhangs.resize(secondPartLen);
268        const float secondPartWidth =
269                run.measureHyphenPiece(mTextBuf, secondPart, editForNextLine(hyph),
270                                       EndHyphenEdit::NO_EDIT, advances.data(), overhangs.data());
271        // hyphPreBreak is calculated like this so that when the line width for a future line break
272        // is being calculated, the width of the whole word would be subtracted and the width of the
273        // second part would be added.
274        const ParaWidth hyphPreBreak = postBreak - secondPartWidth;
275        overhang = computeOverhang(secondPartWidth, advances, overhangs, isRtlWord);
276        const float secondOverhang = isRtlWord ? overhang.right : overhang.left;
277
278        addWordBreak(j, hyphPreBreak, hyphPostBreak, firstOverhang, secondOverhang, postSpaceCount,
279                     postSpaceCount, hyphenPenalty, hyph, isRtlWord);
280    }
281}
282
283// This method finds the candidate word breaks (using the ICU break iterator) and sends them
284// to addWordBreak.
285void LineBreakerImpl::addRuns(const MeasuredText& measured, const LineWidth& lineWidth,
286                              const TabStops& tabStops) {
287    for (const auto& run : measured.runs) {
288        const bool isRtl = run->isRtl();
289        const Range& range = run->getRange();
290
291        const bool canHyphenate = run->canHyphenate();
292        float hyphenPenalty = 0.0;
293        if (canHyphenate) {
294            const MinikinPaint* paint = run->getPaint();
295            // a heuristic that seems to perform well
296            hyphenPenalty = 0.5 * paint->size * paint->scaleX * lineWidth.getAt(0);
297            if (mHyphenationFrequency == HyphenationFrequency::Normal) {
298                hyphenPenalty *= 4.0;  // TODO: Replace with a better value after some testing
299            }
300
301            if (mJustified) {
302                // Make hyphenation more aggressive for fully justified text (so that "normal" in
303                // justified mode is the same as "full" in ragged-right).
304                hyphenPenalty *= 0.25;
305            } else {
306                // Line penalty is zero for justified text.
307                mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
308            }
309        }
310
311        setLocaleList(run->getLocaleListId(), range.getStart());
312        size_t current = (size_t)mWordBreaker->current();
313        // This will keep the index of last code unit seen that's not a line-ending space, plus one.
314        // In other words, the index of the first code unit after a word.
315
316        Range hyphenationContextRange(range.getStart(), range.getStart());
317        ParaWidth lastBreakWidth = mWidth;  // The width of the text as of the previous break point.
318        ParaWidth postBreak = mWidth;       // The width of text seen if we decide to break here
319        // postBreak plus potential forward overhang. Guaranteed to be >= postBreak.
320        ParaWidth postBreakWithOverhang = mWidth;
321        // The maximum amount of backward overhang seen since last word.
322        float maxBackwardOverhang = 0;
323        size_t postSpaceCount = mSpaceCount;
324        const bool hyphenate = canHyphenate && mHyphenationFrequency != HyphenationFrequency::None;
325        for (size_t i : range) {
326            const uint16_t c = mTextBuf[i];
327            if (c == CHAR_TAB) {
328                // Fall back to greedy; other modes don't know how to deal with tabs.
329                mStrategy = BreakStrategy::Greedy;
330                // In order to figure out the actual width of the tab, we need to run the greedy
331                // algorithm on all previous text and determine the last line break's preBreak.
332                const ParaWidth lastPreBreak = computeBreaksGreedyPartial(measured, lineWidth);
333                mWidth = lastPreBreak + tabStops.nextTab(mWidth - lastPreBreak);
334                if (mFirstTabIndex == INT_MAX) {
335                    mFirstTabIndex = static_cast<int>(i);
336                }
337                // No need to update afterWord since tab characters can not be an end of word
338                // character in WordBreaker. See the implementation of WordBreaker::wordEnd.
339            } else {
340                if (isWordSpace(c)) {
341                    mSpaceCount += 1;
342                }
343                mWidth += measured.widths[i];
344                if (isLineEndSpace(c)) {
345                    // If we break a line on a line-ending space, that space goes away. So postBreak
346                    // and postSpaceCount, which keep the width and number of spaces if we decide to
347                    // break at this point, don't need to get adjusted.
348                    //
349                    // TODO: handle the rare case of line ending spaces having overhang (it can
350                    // happen for U+1680 OGHAM SPACE MARK).
351                } else {
352                    postBreak = mWidth;
353                    postSpaceCount = mSpaceCount;
354                    hyphenationContextRange.setEnd(i + 1);
355
356                    // TODO: This doesn't work for very tight lines and large overhangs, where the
357                    // overhang from a previous word that may end up on an earline line may be
358                    // considered still in effect for a later word. But that's expected to be very
359                    // rare, so we ignore it for now.
360                    const float forwardOverhang =
361                            isRtl ? measured.overhangs[i].left : measured.overhangs[i].right;
362                    postBreakWithOverhang =
363                            std::max(postBreakWithOverhang, postBreak + forwardOverhang);
364
365                    float backwardOverhang =
366                            isRtl ? measured.overhangs[i].right : measured.overhangs[i].left;
367                    // Adjust backwardOverhang by the advance already seen from the last break.
368                    backwardOverhang -= (mWidth - measured.widths[i]) - lastBreakWidth;
369                    maxBackwardOverhang = std::max(maxBackwardOverhang, backwardOverhang);
370                }
371            }
372            if (i + 1 == current) {  // We are at the end of a word.
373                // We skip breaks for zero-width characters inside replacement spans.
374                const bool addBreak =
375                        canHyphenate || current == range.getEnd() || measured.widths[current] > 0;
376
377                if (addBreak) {
378                    // adjust second overhang for previous breaks
379                    adjustSecondOverhang(maxBackwardOverhang);
380                }
381                if (hyphenate) {
382                    const Range wordRange = mWordBreaker->wordRange();
383                    if (!wordRange.isEmpty() && range.contains(wordRange)) {
384                        addHyphenationCandidates(*run, hyphenationContextRange, wordRange,
385                                                 lastBreakWidth, postBreak, postSpaceCount,
386                                                 hyphenPenalty);
387                    }
388                }
389                if (addBreak) {
390                    const float penalty = hyphenPenalty * mWordBreaker->breakBadness();
391                    // TODO: overhangs may need adjustment at bidi boundaries.
392                    addWordBreak(current, mWidth /* preBreak */, postBreak,
393                                 postBreakWithOverhang - postBreak /* firstOverhang */,
394                                 0.0 /* secondOverhang, to be adjusted later */, mSpaceCount,
395                                 postSpaceCount, penalty, HyphenationType::DONT_BREAK, isRtl);
396                }
397                hyphenationContextRange = Range(current, current);
398                lastBreakWidth = mWidth;
399                maxBackwardOverhang = 0;
400                current = (size_t)mWordBreaker->next();
401            }
402        }
403    }
404}
405
406// Add desperate breaks for the greedy algorithm.
407// Note: these breaks are based on the shaping of the (non-broken) original text; they
408// are imprecise especially in the presence of kerning, ligatures, overhangs, and Arabic shaping.
409void LineBreakerImpl::addDesperateBreaksGreedy(const MeasuredText& measured,
410                                               ParaWidth existingPreBreak, size_t start, size_t end,
411                                               const LineWidth& lineWidth) {
412    ParaWidth width = measured.widths[start];
413    for (size_t i = start + 1; i < end; i++) {
414        const float w = measured.widths[i];
415        if (w > 0) {  // Add desperate breaks only before grapheme clusters.
416            const ParaWidth newWidth = width + w;
417            if (!fitsOnCurrentLine(newWidth, 0.0, 0.0, lineWidth)) {
418                const Candidate& lastGreedyBreak = getLastBreakCandidate();
419                constexpr HyphenationType hyphen = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
420                pushBreak(i, width, computeMaxExtent(measured, lastGreedyBreak.offset, i),
421                          packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType),
422                                         editForThisLine(hyphen)));
423
424                existingPreBreak += width;
425                // Only set the fields that will be later read.
426                mFakeDesperateCandidate.offset = i;
427                mFakeDesperateCandidate.preBreak = existingPreBreak;
428                mFakeDesperateCandidate.secondOverhang = 0.0;
429                mFakeDesperateCandidate.hyphenType = hyphen;
430                mLastGreedyBreakIndex = LAST_BREAK_OFFSET_DESPERATE;
431
432                width = w;
433            } else {
434                width = newWidth;
435            }
436        }
437    }
438}
439
440// Add a word break (possibly for a hyphenated fragment).
441inline void LineBreakerImpl::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
442                                          float firstOverhang, float secondOverhang,
443                                          size_t preSpaceCount, size_t postSpaceCount,
444                                          float penalty, HyphenationType hyph, bool isRtl) {
445    mCandidates.push_back({offset, preBreak, postBreak, firstOverhang, secondOverhang, penalty,
446                           preSpaceCount, postSpaceCount, hyph, isRtl});
447}
448
449// Go back and adjust earlier line breaks if needed.
450void LineBreakerImpl::adjustSecondOverhang(float secondOverhang) {
451    const size_t lastCand = mCandidates.size() - 1;
452    const ParaWidth lastPreBreak = mCandidates[lastCand].preBreak;
453    for (ssize_t i = lastCand; i >= 0; i--) {
454        // "lastPreBreak - mCandidates[i].preBreak" is the amount of difference in mWidth when those
455        // breaks where added. So by subtracting that difference, we are subtracting the difference
456        // in advances in order to find out how much overhang still remains.
457        const float remainingOverhang = secondOverhang - (lastPreBreak - mCandidates[i].preBreak);
458        if (remainingOverhang <= 0.0) {
459            // No more remaining overhang. We don't need to adjust anything anymore.
460            return;
461        }
462        mCandidates[i].secondOverhang = std::max(mCandidates[i].secondOverhang, remainingOverhang);
463    }
464}
465
466// Find the needed extent between the start and end ranges. start is inclusive and end is exclusive.
467// Both are indices of the source string.
468MinikinExtent LineBreakerImpl::computeMaxExtent(const MeasuredText& measured, size_t start,
469                                                size_t end) const {
470    MinikinExtent res = {0.0, 0.0, 0.0};
471    for (size_t j = start; j < end; j++) {
472        res.extendBy(measured.extents[j]);
473    }
474    return res;
475}
476
477void LineBreakerImpl::addGreedyBreak(const MeasuredText& measured, size_t breakIndex) {
478    const Candidate& candidate = mCandidates[breakIndex];
479    const Candidate& lastGreedyBreak = getLastBreakCandidate();
480    pushBreak(candidate.offset, candidate.postBreak - lastGreedyBreak.preBreak,
481              computeMaxExtent(measured, lastGreedyBreak.offset, candidate.offset),
482              packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType),
483                             editForThisLine(candidate.hyphenType)));
484    mLastGreedyBreakIndex = breakIndex;
485}
486
487// Also add desperate breaks if needed (ie when word exceeds current line width).
488void LineBreakerImpl::considerGreedyBreakCandidate(const MeasuredText& measured, size_t candIndex,
489                                                   const LineWidth& lineWidth) {
490    const Candidate* cand = &mCandidates[candIndex];
491    const Candidate* lastGreedyBreak = &getLastBreakCandidate();
492    float leftOverhang, rightOverhang;
493    // TODO: Only works correctly for unidirectional text. Needs changes for bidi text.
494    if (cand->isRtl) {
495        leftOverhang = cand->firstOverhang;
496        rightOverhang = lastGreedyBreak->secondOverhang;
497    } else {
498        leftOverhang = lastGreedyBreak->secondOverhang;
499        rightOverhang = cand->firstOverhang;
500    }
501    while (!fitsOnCurrentLine(cand->postBreak - lastGreedyBreak->preBreak, leftOverhang,
502                              rightOverhang, lineWidth)) {
503        // This break would create an overfull line, pick the best break and break there (greedy).
504        // We do this in a loop, since there's no guarantee that after a break the remaining text
505        // would fit on the next line.
506
507        if (mBestGreedyBreaks.empty()) {
508            // If no break has been found since last break but we are inside this loop, the
509            // section between the last line break and this candidate doesn't fit in the available
510            // space. So we need to consider desperate breaks.
511
512            // Add desperate breaks starting immediately after the last break.
513            addDesperateBreaksGreedy(measured, lastGreedyBreak->preBreak, lastGreedyBreak->offset,
514                                     cand->offset, lineWidth);
515            break;
516        } else {
517            // Break at the best known break.
518            addGreedyBreak(measured, popBestGreedyBreak());
519
520            // addGreedyBreak updates the last break candidate.
521            lastGreedyBreak = &getLastBreakCandidate();
522            if (cand->isRtl) {
523                rightOverhang = lastGreedyBreak->secondOverhang;
524            } else {
525                leftOverhang = lastGreedyBreak->secondOverhang;
526            }
527        }
528    }
529    insertGreedyBreakCandidate(candIndex, cand->penalty);
530}
531
532void LineBreakerImpl::pushBreak(int offset, float width, MinikinExtent extent,
533                                HyphenEdit hyphenEdit) {
534    mBreaks.push_back(offset);
535    mWidths.push_back(width);
536    mAscents.push_back(extent.ascent);
537    mDescents.push_back(extent.descent);
538    const int flags = ((mFirstTabIndex < mBreaks.back()) << kTab_Shift) | hyphenEdit;
539    mFlags.push_back(flags);
540    mFirstTabIndex = INT_MAX;
541}
542
543LineBreakerImpl::ParaWidth LineBreakerImpl::computeBreaksGreedyPartial(const MeasuredText& measured,
544                                                                       const LineWidth& lineWidth) {
545    size_t firstCandidate;
546    if (mLastConsideredGreedyCandidate == SIZE_MAX) {
547        // Clear results and reset greedy line breaker state if we are here for the first time.
548        clearResults();
549        mBestGreedyBreaks.clear();
550        mLastGreedyBreakIndex = 0;
551        mFirstTabIndex = INT_MAX;
552        firstCandidate = 1;
553    } else {
554        firstCandidate = mLastConsideredGreedyCandidate + 1;
555    }
556
557    const size_t lastCandidate = mCandidates.size() - 1;
558    for (size_t cand = firstCandidate; cand <= lastCandidate; cand++) {
559        considerGreedyBreakCandidate(measured, cand, lineWidth);
560    }
561    mLastConsideredGreedyCandidate = lastCandidate;
562    return getLastBreakCandidate().preBreak;
563}
564
565// Get the width of a space. May return 0 if there are no spaces.
566// Note: if there are multiple different widths for spaces (for example, because of mixing of
567// fonts), it's only guaranteed to pick one.
568float LineBreakerImpl::getSpaceWidth(const MeasuredText& measured) const {
569    for (size_t i = 0; i < mTextBuf.size(); i++) {
570        if (isWordSpace(mTextBuf[i])) {
571            return measured.widths[i];
572        }
573    }
574    return 0.0f;
575}
576
577bool LineBreakerImpl::fitsOnCurrentLine(float width, float leftOverhang, float rightOverhang,
578                                        const LineWidth& lineWidth) const {
579    const size_t lineNo = mBreaks.size();
580    const float availableWidth = lineWidth.getAt(lineNo);
581    const float availableLeftPadding = lineWidth.getLeftPaddingAt(lineNo);
582    const float availableRightPadding = lineWidth.getRightPaddingAt(lineNo);
583    const float remainingLeftOverhang = std::max(0.0f, leftOverhang - availableLeftPadding);
584    const float remainingRightOverhang = std::max(0.0f, rightOverhang - availableRightPadding);
585    return width + remainingLeftOverhang + remainingRightOverhang <= availableWidth;
586}
587
588void LineBreakerImpl::computeBreaksGreedy(const MeasuredText& measured,
589                                          const LineWidth& lineWidth) {
590    computeBreaksGreedyPartial(measured, lineWidth);
591    // All breaks but the last have been added by computeBreaksGreedyPartial() already.
592    const Candidate* lastCandidate = &mCandidates.back();
593    if (mCandidates.size() == 1 || mLastGreedyBreakIndex != (mCandidates.size() - 1)) {
594        const Candidate& lastGreedyBreak = getLastBreakCandidate();
595        pushBreak(lastCandidate->offset, lastCandidate->postBreak - lastGreedyBreak.preBreak,
596                  computeMaxExtent(measured, lastGreedyBreak.offset, lastCandidate->offset),
597                  packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType),
598                                 EndHyphenEdit::NO_EDIT));
599        // No need to update mLastGreedyBreakIndex because we're done.
600    }
601}
602
603// Add desperate breaks for the optimal algorithm.
604// Note: these breaks are based on the shaping of the (non-broken) original text; they
605// are imprecise especially in the presence of kerning, ligatures, overhangs, and Arabic shaping.
606void LineBreakerImpl::addDesperateBreaksOptimal(const MeasuredText& measured,
607                                                std::vector<Candidate>* out,
608                                                ParaWidth existingPreBreak, size_t postSpaceCount,
609                                                bool isRtl, size_t start, size_t end) {
610    ParaWidth width = existingPreBreak + measured.widths[start];
611    for (size_t i = start + 1; i < end; i++) {
612        const float w = measured.widths[i];
613        if (w > 0) {  // Add desperate breaks only before grapheme clusters.
614            out->push_back({i /* offset */, width /* preBreak */, width /* postBreak */,
615                            0.0 /* firstOverhang */, 0.0 /* secondOverhang */,
616                            SCORE_DESPERATE /* penalty */,
617                            // postSpaceCount doesn't include trailing spaces.
618                            postSpaceCount /* preSpaceCount */, postSpaceCount /* postSpaceCount */,
619                            HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN /* hyphenType */,
620                            isRtl /* isRtl */});
621            width += w;
622        }
623    }
624}
625
626void LineBreakerImpl::addAllDesperateBreaksOptimal(const MeasuredText& measured,
627                                                   const LineWidth& lineWidth) {
628    const ParaWidth minLineWidth = lineWidth.getMin();
629    size_t firstDesperateIndex = 0;
630    const size_t nCand = mCandidates.size();
631    for (size_t i = 1; i < nCand; i++) {
632        const ParaWidth requiredWidth = mCandidates[i].postBreak - mCandidates[i - 1].preBreak;
633        if (requiredWidth > minLineWidth) {
634            // A desperate break is needed.
635            firstDesperateIndex = i;
636            break;
637        }
638    }
639    if (firstDesperateIndex == 0) {  // No desperate breaks needed.
640        return;
641    }
642
643    // This temporary holds an expanded list of candidates, which will be later copied back into
644    // mCandidates. The beginning of mCandidates, where there are no desparate breaks, is skipped.
645    std::vector<Candidate> expandedCandidates;
646    const size_t nRemainingCandidates = nCand - firstDesperateIndex;
647    expandedCandidates.reserve(nRemainingCandidates + 1);  // At least one more is needed.
648    for (size_t i = firstDesperateIndex; i < nCand; i++) {
649        const Candidate& previousCand = mCandidates[i - 1];
650        const Candidate& thisCand = mCandidates[i];
651        const ParaWidth requiredWidth = thisCand.postBreak - previousCand.preBreak;
652        if (requiredWidth > minLineWidth) {
653            addDesperateBreaksOptimal(measured, &expandedCandidates, previousCand.preBreak,
654                                      thisCand.postSpaceCount, thisCand.isRtl,
655                                      previousCand.offset /* start */, thisCand.offset /* end */);
656        }
657        expandedCandidates.push_back(thisCand);
658    }
659
660    mCandidates.reserve(firstDesperateIndex + expandedCandidates.size());
661    // Iterator to the first candidate to insert from expandedCandidates. The candidates before this
662    // would simply be copied.
663    auto firstToInsert = expandedCandidates.begin() + nRemainingCandidates;
664    // Copy until the end of mCandidates.
665    std::copy(expandedCandidates.begin(), firstToInsert, mCandidates.begin() + firstDesperateIndex);
666    // Insert the rest.
667    mCandidates.insert(mCandidates.end(), firstToInsert, expandedCandidates.end());
668}
669
670// Follow "prev" links in mCandidates array, and copy to result arrays.
671void LineBreakerImpl::finishBreaksOptimal(const MeasuredText& measured,
672                                          const std::vector<OptimalBreaksData>& breaksData) {
673    // clear output vectors.
674    clearResults();
675
676    const size_t nCand = mCandidates.size();
677    size_t prev;
678    for (size_t i = nCand - 1; i > 0; i = prev) {
679        prev = breaksData[i].prev;
680        mBreaks.push_back(mCandidates[i].offset);
681        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
682        MinikinExtent extent =
683                computeMaxExtent(measured, mCandidates[prev].offset, mCandidates[i].offset);
684        mAscents.push_back(extent.ascent);
685        mDescents.push_back(extent.descent);
686
687        const HyphenEdit edit =
688                packHyphenEdit(prev == 0 ? StartHyphenEdit::NO_EDIT
689                                         : editForNextLine(mCandidates[prev].hyphenType),
690                               editForThisLine(mCandidates[i].hyphenType));
691        mFlags.push_back(static_cast<int>(edit));
692    }
693    std::reverse(mBreaks.begin(), mBreaks.end());
694    std::reverse(mWidths.begin(), mWidths.end());
695    std::reverse(mFlags.begin(), mFlags.end());
696}
697
698void LineBreakerImpl::computeBreaksOptimal(const MeasuredText& measured,
699                                           const LineWidth& lineWidth) {
700    size_t active = 0;
701    const size_t nCand = mCandidates.size();
702    const float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth(measured) : 0.0f;
703
704    std::vector<OptimalBreaksData> breaksData;
705    breaksData.reserve(nCand);
706    breaksData.push_back({0.0, 0, 0});  // The first candidate is always at the first line.
707
708    // "i" iterates through candidates for the end of the line.
709    for (size_t i = 1; i < nCand; i++) {
710        const bool atEnd = i == nCand - 1;
711        float best = SCORE_INFTY;
712        size_t bestPrev = 0;
713
714        size_t lineNumberLast = breaksData[active].lineNumber;
715        float width = lineWidth.getAt(lineNumberLast);
716
717        ParaWidth leftEdge = mCandidates[i].postBreak - width;
718        float bestHope = 0;
719
720        // "j" iterates through candidates for the beginning of the line.
721        for (size_t j = active; j < i; j++) {
722            const size_t lineNumber = breaksData[j].lineNumber;
723            if (lineNumber != lineNumberLast) {
724                const float widthNew = lineWidth.getAt(lineNumber);
725                if (widthNew != width) {
726                    leftEdge = mCandidates[i].postBreak - width;
727                    bestHope = 0;
728                    width = widthNew;
729                }
730                lineNumberLast = lineNumber;
731            }
732            const float jScore = breaksData[j].score;
733            if (jScore + bestHope >= best) continue;
734            const float delta = mCandidates[j].preBreak - leftEdge;
735
736            // compute width score for line
737
738            // Note: the "bestHope" optimization makes the assumption that, when delta is
739            // non-negative, widthScore will increase monotonically as successive candidate
740            // breaks are considered.
741            float widthScore = 0.0f;
742            float additionalPenalty = 0.0f;
743            if ((atEnd || !mJustified) && delta < 0) {
744                widthScore = SCORE_OVERFULL;
745            } else if (atEnd && mStrategy != BreakStrategy::Balanced) {
746                // increase penalty for hyphen on last line
747                additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
748            } else {
749                widthScore = delta * delta;
750                if (delta < 0) {
751                    if (-delta < maxShrink * (mCandidates[i].postSpaceCount -
752                                              mCandidates[j].preSpaceCount)) {
753                        widthScore *= SHRINK_PENALTY_MULTIPLIER;
754                    } else {
755                        widthScore = SCORE_OVERFULL;
756                    }
757                }
758            }
759
760            if (delta < 0) {
761                active = j + 1;
762            } else {
763                bestHope = widthScore;
764            }
765
766            const float score = jScore + widthScore + additionalPenalty;
767            if (score <= best) {
768                best = score;
769                bestPrev = j;
770            }
771        }
772        breaksData.push_back({best + mCandidates[i].penalty + mLinePenalty,  // score
773                              bestPrev,                                      // prev
774                              breaksData[bestPrev].lineNumber + 1});         // lineNumber
775    }
776    finishBreaksOptimal(measured, breaksData);
777}
778
779LineBreakResult LineBreakerImpl::computeBreaks(const MeasuredText& measured,
780                                               const LineWidth& lineWidth,
781                                               const TabStops& tabStops) {
782    addRuns(measured, lineWidth, tabStops);
783
784    if (mStrategy == BreakStrategy::Greedy) {
785        computeBreaksGreedy(measured, lineWidth);
786    } else {
787        addAllDesperateBreaksOptimal(measured, lineWidth);
788        computeBreaksOptimal(measured, lineWidth);
789    }
790    LineBreakResult result;
791    result.breakPoints = std::move(mBreaks);
792    result.widths = std::move(mWidths);
793    result.ascents = std::move(mAscents);
794    result.descents = std::move(mDescents);
795    result.flags = std::move(mFlags);
796    return result;
797}
798
799size_t LineBreakerImpl::popBestGreedyBreak() {
800    const size_t bestBreak = mBestGreedyBreaks.front().index;
801    mBestGreedyBreaks.pop_front();
802    return bestBreak;
803}
804
805void LineBreakerImpl::insertGreedyBreakCandidate(size_t index, float penalty) {
806    GreedyBreak gBreak = {index, penalty};
807    if (!mBestGreedyBreaks.empty()) {
808        // Find the location in the queue where the penalty is <= the current penalty, and drop the
809        // elements from there to the end of the queue.
810        auto where = std::lower_bound(mBestGreedyBreaks.begin(), mBestGreedyBreaks.end(), gBreak,
811                                      [](GreedyBreak first, GreedyBreak second) -> bool {
812                                          return first.penalty < second.penalty;
813                                      });
814        if (where != mBestGreedyBreaks.end()) {
815            mBestGreedyBreaks.erase(where, mBestGreedyBreaks.end());
816        }
817    }
818    mBestGreedyBreaks.push_back(gBreak);
819}
820
821LineBreakResult breakIntoLines(const U16StringPiece& textBuffer, BreakStrategy strategy,
822                               HyphenationFrequency frequency, bool justified,
823                               const MeasuredText& measuredText, const LineWidth& lineWidth,
824                               const TabStops& tabStops) {
825    LineBreakerImpl impl(textBuffer, strategy, frequency, justified);
826    return impl.computeBreaks(measuredText, lineWidth, tabStops);
827}
828
829}  // namespace minikin
830