1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define VERBOSE_DEBUG 0
18
19#define LOG_TAG "Minikin"
20
21#include <limits>
22
23#include <log/log.h>
24
25#include "LayoutUtils.h"
26#include <minikin/Layout.h>
27#include <minikin/LineBreaker.h>
28
29using std::vector;
30
31namespace minikin {
32
33const int CHAR_TAB = 0x0009;
34
35// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
36// constants are larger than any reasonable actual width score.
37const float SCORE_INFTY = std::numeric_limits<float>::max();
38const float SCORE_OVERFULL = 1e12f;
39const float SCORE_DESPERATE = 1e10f;
40
41// Multiplier for hyphen penalty on last line.
42const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
43// Penalty assigned to each line break (to try to minimize number of lines)
44// TODO: when we implement full justification (so spaces can shrink and stretch), this is
45// probably not the most appropriate method.
46const float LINE_PENALTY_MULTIPLIER = 2.0f;
47
48// Penalty assigned to shrinking the whitepsace.
49const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
50
51// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
52// unreasonably long words. This is somewhat of a heuristic because extremely long words
53// are possible in some languages. This does mean that very long real words can get
54// broken by desperate breaks, with no hyphens.
55const size_t LONGEST_HYPHENATED_WORD = 45;
56
57// When the text buffer is within this limit, capacity of vectors is retained at finish(),
58// to avoid allocation.
59const size_t MAX_TEXT_BUF_RETAIN = 32678;
60
61// Maximum amount that spaces can shrink, in justified text.
62const float SHRINKABILITY = 1.0 / 3.0;
63
64void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
65    mWordBreaker.setLocale(locale);
66    mLocale = locale;
67    mHyphenator = hyphenator;
68}
69
70void LineBreaker::setText() {
71    mWordBreaker.setText(mTextBuf.data(), mTextBuf.size());
72
73    // handle initial break here because addStyleRun may never be called
74    mWordBreaker.next();
75    mCandidates.clear();
76    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK};
77    mCandidates.push_back(cand);
78
79    // reset greedy breaker state
80    mBreaks.clear();
81    mWidths.clear();
82    mFlags.clear();
83    mLastBreak = 0;
84    mBestBreak = 0;
85    mBestScore = SCORE_INFTY;
86    mPreBreak = 0;
87    mLastHyphenation = HyphenEdit::NO_EDIT;
88    mFirstTabIndex = INT_MAX;
89    mSpaceCount = 0;
90}
91
92void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
93    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
94}
95
96
97void LineBreaker::setIndents(const std::vector<float>& indents) {
98    mLineWidths.setIndents(indents);
99}
100
101// This function determines whether a character is a space that disappears at end of line.
102// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
103// plus '\n'.
104// Note: all such characters are in the BMP, so it's ok to use code units for this.
105static bool isLineEndSpace(uint16_t c) {
106    return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
107            c == 0x205F || c == 0x3000;
108}
109
110// Ordinarily, this method measures the text in the range given. However, when paint
111// is nullptr, it assumes the widths have already been calculated and stored in the
112// width buffer.
113// This method finds the candidate word breaks (using the ICU break iterator) and sends them
114// to addCandidate.
115float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
116        FontStyle style, size_t start, size_t end, bool isRtl) {
117    float width = 0.0f;
118    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
119
120    float hyphenPenalty = 0.0;
121    if (paint != nullptr) {
122        width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags,
123                style, *paint, typeface, mCharWidths.data() + start);
124
125        // a heuristic that seems to perform well
126        hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
127        if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
128            hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
129        }
130
131        if (mJustified) {
132            // Make hyphenation more aggressive for fully justified text (so that "normal" in
133            // justified mode is the same as "full" in ragged-right).
134            hyphenPenalty *= 0.25;
135        } else {
136            // Line penalty is zero for justified text.
137            mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
138        }
139    }
140
141    size_t current = (size_t)mWordBreaker.current();
142    size_t afterWord = start;
143    size_t lastBreak = start;
144    ParaWidth lastBreakWidth = mWidth;
145    ParaWidth postBreak = mWidth;
146    size_t postSpaceCount = mSpaceCount;
147    for (size_t i = start; i < end; i++) {
148        uint16_t c = mTextBuf[i];
149        if (c == CHAR_TAB) {
150            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
151            if (mFirstTabIndex == INT_MAX) {
152                mFirstTabIndex = (int)i;
153            }
154            // fall back to greedy; other modes don't know how to deal with tabs
155            mStrategy = kBreakStrategy_Greedy;
156        } else {
157            if (isWordSpace(c)) mSpaceCount += 1;
158            mWidth += mCharWidths[i];
159            if (!isLineEndSpace(c)) {
160                postBreak = mWidth;
161                postSpaceCount = mSpaceCount;
162                afterWord = i + 1;
163            }
164        }
165        if (i + 1 == current) {
166            size_t wordStart = mWordBreaker.wordStart();
167            size_t wordEnd = mWordBreaker.wordEnd();
168            if (paint != nullptr && mHyphenator != nullptr &&
169                    mHyphenationFrequency != kHyphenationFrequency_None &&
170                    wordStart >= start && wordEnd > wordStart &&
171                    wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
172                mHyphenator->hyphenate(&mHyphBuf,
173                        &mTextBuf[wordStart],
174                        wordEnd - wordStart,
175                        mLocale);
176#if VERBOSE_DEBUG
177                std::string hyphenatedString;
178                for (size_t j = wordStart; j < wordEnd; j++) {
179                    if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) {
180                        hyphenatedString.push_back('-');
181                    }
182                    // Note: only works with ASCII, should do UTF-8 conversion here
183                    hyphenatedString.push_back(buffer()[j]);
184                }
185                ALOGD("hyphenated string: %s", hyphenatedString.c_str());
186#endif
187
188                // measure hyphenated substrings
189                for (size_t j = wordStart; j < wordEnd; j++) {
190                    HyphenationType hyph = mHyphBuf[j - wordStart];
191                    if (hyph != HyphenationType::DONT_BREAK) {
192                        paint->hyphenEdit = HyphenEdit::editForThisLine(hyph);
193                        const float firstPartWidth = Layout::measureText(mTextBuf.data(),
194                                lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style,
195                                *paint, typeface, nullptr);
196                        ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
197
198                        paint->hyphenEdit = HyphenEdit::editForNextLine(hyph);
199                        const float secondPartWidth = Layout::measureText(mTextBuf.data(), j,
200                                afterWord - j, mTextBuf.size(), bidiFlags, style, *paint,
201                                typeface, nullptr);
202                        ParaWidth hyphPreBreak = postBreak - secondPartWidth;
203
204                        addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount,
205                                hyphenPenalty, hyph);
206
207                        paint->hyphenEdit = HyphenEdit::NO_EDIT;
208                    }
209                }
210            }
211
212            // Skip break for zero-width characters inside replacement span
213            if (paint != nullptr || current == end || mCharWidths[current] > 0) {
214                float penalty = hyphenPenalty * mWordBreaker.breakBadness();
215                addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty,
216                        HyphenationType::DONT_BREAK);
217            }
218            lastBreak = current;
219            lastBreakWidth = mWidth;
220            current = (size_t)mWordBreaker.next();
221        }
222    }
223
224    return width;
225}
226
227// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
228// needed (ie when word exceeds current line width)
229void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
230        size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) {
231    Candidate cand;
232    ParaWidth width = mCandidates.back().preBreak;
233    if (postBreak - width > currentLineWidth()) {
234        // Add desperate breaks.
235        // Note: these breaks are based on the shaping of the (non-broken) original text; they
236        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
237        size_t i = mCandidates.back().offset;
238        width += mCharWidths[i++];
239        for (; i < offset; i++) {
240            float w = mCharWidths[i];
241            if (w > 0) {
242                cand.offset = i;
243                cand.preBreak = width;
244                cand.postBreak = width;
245                // postSpaceCount doesn't include trailing spaces
246                cand.preSpaceCount = postSpaceCount;
247                cand.postSpaceCount = postSpaceCount;
248                cand.penalty = SCORE_DESPERATE;
249                cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
250#if VERBOSE_DEBUG
251                ALOGD("desperate cand: %zd %g:%g",
252                        mCandidates.size(), cand.postBreak, cand.preBreak);
253#endif
254                addCandidate(cand);
255                width += w;
256            }
257        }
258    }
259
260    cand.offset = offset;
261    cand.preBreak = preBreak;
262    cand.postBreak = postBreak;
263    cand.penalty = penalty;
264    cand.preSpaceCount = preSpaceCount;
265    cand.postSpaceCount = postSpaceCount;
266    cand.hyphenType = hyph;
267#if VERBOSE_DEBUG
268    ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
269#endif
270    addCandidate(cand);
271}
272
273// Helper method for addCandidate()
274void LineBreaker::pushGreedyBreak() {
275    const Candidate& bestCandidate = mCandidates[mBestBreak];
276    pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak,
277            mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType));
278    mBestScore = SCORE_INFTY;
279#if VERBOSE_DEBUG
280    ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
281#endif
282    mLastBreak = mBestBreak;
283    mPreBreak = bestCandidate.preBreak;
284    mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType);
285}
286
287// TODO performance: could avoid populating mCandidates if greedy only
288void LineBreaker::addCandidate(Candidate cand) {
289    const size_t candIndex = mCandidates.size();
290    mCandidates.push_back(cand);
291
292    // mLastBreak is the index of the last line break we decided to do in mCandidates,
293    // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate
294    // we have found since then, and mBestScore is its penalty.
295    if (cand.postBreak - mPreBreak > currentLineWidth()) {
296        // This break would create an overfull line, pick the best break and break there (greedy)
297        if (mBestBreak == mLastBreak) {
298            // No good break has been found since last break. Break here.
299            mBestBreak = candIndex;
300        }
301        pushGreedyBreak();
302    }
303
304    while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) {
305        // We should rarely come here. But if we are here, we have broken the line, but the
306        // remaining part still doesn't fit. We now need to break at the second best place after the
307        // last break, but we have not kept that information, so we need to go back and find it.
308        //
309        // In some really rare cases, postBreak - preBreak of a candidate itself may be over the
310        // current line width. We protect ourselves against an infinite loop in that case by
311        // checking that we have not broken the line at this candidate already.
312        for (size_t i = mLastBreak + 1; i < candIndex; i++) {
313            const float penalty = mCandidates[i].penalty;
314            if (penalty <= mBestScore) {
315                mBestBreak = i;
316                mBestScore = penalty;
317            }
318        }
319        if (mBestBreak == mLastBreak) {
320            // We didn't find anything good. Break here.
321            mBestBreak = candIndex;
322        }
323        pushGreedyBreak();
324    }
325
326    if (cand.penalty <= mBestScore) {
327        mBestBreak = candIndex;
328        mBestScore = cand.penalty;
329    }
330}
331
332void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) {
333    mBreaks.push_back(offset);
334    mWidths.push_back(width);
335    int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
336    flags |= hyphenEdit;
337    mFlags.push_back(flags);
338    mFirstTabIndex = INT_MAX;
339}
340
341void LineBreaker::addReplacement(size_t start, size_t end, float width) {
342    mCharWidths[start] = width;
343    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
344    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
345}
346
347// Get the width of a space. May return 0 if there are no spaces.
348// Note: if there are multiple different widths for spaces (for example, because of mixing of
349// fonts), it's only guaranteed to pick one.
350float LineBreaker::getSpaceWidth() const {
351    for (size_t i = 0; i < mTextBuf.size(); i++) {
352        if (isWordSpace(mTextBuf[i])) {
353            return mCharWidths[i];
354        }
355    }
356    return 0.0f;
357}
358
359float LineBreaker::currentLineWidth() const {
360    return mLineWidths.getLineWidth(mBreaks.size());
361}
362
363void LineBreaker::computeBreaksGreedy() {
364    // All breaks but the last have been added in addCandidate already.
365    size_t nCand = mCandidates.size();
366    if (nCand == 1 || mLastBreak != nCand - 1) {
367        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak,
368                mLastHyphenation);
369        // don't need to update mBestScore, because we're done
370#if VERBOSE_DEBUG
371        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
372#endif
373    }
374}
375
376// Follow "prev" links in mCandidates array, and copy to result arrays.
377void LineBreaker::finishBreaksOptimal() {
378    // clear existing greedy break result
379    mBreaks.clear();
380    mWidths.clear();
381    mFlags.clear();
382    size_t nCand = mCandidates.size();
383    size_t prev;
384    for (size_t i = nCand - 1; i > 0; i = prev) {
385        prev = mCandidates[i].prev;
386        mBreaks.push_back(mCandidates[i].offset);
387        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
388        int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType);
389        if (prev > 0) {
390            flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType);
391        }
392        mFlags.push_back(flags);
393    }
394    std::reverse(mBreaks.begin(), mBreaks.end());
395    std::reverse(mWidths.begin(), mWidths.end());
396    std::reverse(mFlags.begin(), mFlags.end());
397}
398
399void LineBreaker::computeBreaksOptimal(bool isRectangle) {
400    size_t active = 0;
401    size_t nCand = mCandidates.size();
402    float width = mLineWidths.getLineWidth(0);
403    float shortLineFactor = mJustified ? 0.75f : 0.5f;
404    float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;
405
406    // "i" iterates through candidates for the end of the line.
407    for (size_t i = 1; i < nCand; i++) {
408        bool atEnd = i == nCand - 1;
409        float best = SCORE_INFTY;
410        size_t bestPrev = 0;
411        size_t lineNumberLast = 0;
412
413        if (!isRectangle) {
414            size_t lineNumberLast = mCandidates[active].lineNumber;
415            width = mLineWidths.getLineWidth(lineNumberLast);
416        }
417        ParaWidth leftEdge = mCandidates[i].postBreak - width;
418        float bestHope = 0;
419
420        // "j" iterates through candidates for the beginning of the line.
421        for (size_t j = active; j < i; j++) {
422            if (!isRectangle) {
423                size_t lineNumber = mCandidates[j].lineNumber;
424                if (lineNumber != lineNumberLast) {
425                    float widthNew = mLineWidths.getLineWidth(lineNumber);
426                    if (widthNew != width) {
427                        leftEdge = mCandidates[i].postBreak - width;
428                        bestHope = 0;
429                        width = widthNew;
430                    }
431                    lineNumberLast = lineNumber;
432                }
433            }
434            float jScore = mCandidates[j].score;
435            if (jScore + bestHope >= best) continue;
436            float delta = mCandidates[j].preBreak - leftEdge;
437
438            // compute width score for line
439
440            // Note: the "bestHope" optimization makes the assumption that, when delta is
441            // non-negative, widthScore will increase monotonically as successive candidate
442            // breaks are considered.
443            float widthScore = 0.0f;
444            float additionalPenalty = 0.0f;
445            if ((atEnd || !mJustified) && delta < 0) {
446                widthScore = SCORE_OVERFULL;
447            } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
448                // increase penalty for hyphen on last line
449                additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
450                // Penalize very short (< 1 - shortLineFactor of total width) lines.
451                float underfill = delta - shortLineFactor * width;
452                widthScore = underfill > 0 ? underfill * underfill : 0;
453            } else {
454                widthScore = delta * delta;
455                if (delta < 0) {
456                    if (-delta < maxShrink *
457                            (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) {
458                        widthScore *= SHRINK_PENALTY_MULTIPLIER;
459                    } else {
460                        widthScore = SCORE_OVERFULL;
461                    }
462                }
463            }
464
465            if (delta < 0) {
466                active = j + 1;
467            } else {
468                bestHope = widthScore;
469            }
470
471            float score = jScore + widthScore + additionalPenalty;
472            if (score <= best) {
473                best = score;
474                bestPrev = j;
475            }
476        }
477        mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
478        mCandidates[i].prev = bestPrev;
479        mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
480#if VERBOSE_DEBUG
481        ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev);
482#endif
483    }
484    finishBreaksOptimal();
485}
486
487size_t LineBreaker::computeBreaks() {
488    if (mStrategy == kBreakStrategy_Greedy) {
489        computeBreaksGreedy();
490    } else {
491        computeBreaksOptimal(mLineWidths.isConstant());
492    }
493    return mBreaks.size();
494}
495
496void LineBreaker::finish() {
497    mWordBreaker.finish();
498    mWidth = 0;
499    mLineWidths.clear();
500    mCandidates.clear();
501    mBreaks.clear();
502    mWidths.clear();
503    mFlags.clear();
504    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
505        mTextBuf.clear();
506        mTextBuf.shrink_to_fit();
507        mCharWidths.clear();
508        mCharWidths.shrink_to_fit();
509        mHyphBuf.clear();
510        mHyphBuf.shrink_to_fit();
511        mCandidates.shrink_to_fit();
512        mBreaks.shrink_to_fit();
513        mWidths.shrink_to_fit();
514        mFlags.shrink_to_fit();
515    }
516    mStrategy = kBreakStrategy_Greedy;
517    mHyphenationFrequency = kHyphenationFrequency_Normal;
518    mLinePenalty = 0.0f;
519    mJustified = false;
520}
521
522}  // namespace minikin
523