1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define VERBOSE_DEBUG 0
18
19#define LOG_TAG "Minikin"
20
21#include <limits>
22
23#include <log/log.h>
24
25#include "LayoutUtils.h"
26#include <minikin/Layout.h>
27#include <minikin/LineBreaker.h>
28
29using std::vector;
30
31namespace minikin {
32
33const int CHAR_TAB = 0x0009;
34
35// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
36// constants are larger than any reasonable actual width score.
37const float SCORE_INFTY = std::numeric_limits<float>::max();
38const float SCORE_OVERFULL = 1e12f;
39const float SCORE_DESPERATE = 1e10f;
40
41// Multiplier for hyphen penalty on last line.
42const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
43// Penalty assigned to each line break (to try to minimize number of lines)
44// TODO: when we implement full justification (so spaces can shrink and stretch), this is
45// probably not the most appropriate method.
46const float LINE_PENALTY_MULTIPLIER = 2.0f;
47
48// Penalty assigned to shrinking the whitepsace.
49const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
50
51// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
52// unreasonably long words. This is somewhat of a heuristic because extremely long words
53// are possible in some languages. This does mean that very long real words can get
54// broken by desperate breaks, with no hyphens.
55const size_t LONGEST_HYPHENATED_WORD = 45;
56
57// When the text buffer is within this limit, capacity of vectors is retained at finish(),
58// to avoid allocation.
59const size_t MAX_TEXT_BUF_RETAIN = 32678;
60
61// Maximum amount that spaces can shrink, in justified text.
62const float SHRINKABILITY = 1.0 / 3.0;
63
64void LineBreaker::setLocales(const char* locales, const std::vector<Hyphenator*>& hyphenators) {
65    bool goodLocaleFound = false;
66    const ssize_t numLocales = hyphenators.size();
67    // For now, we ignore all locales except the first valid one.
68    // TODO: Support selecting the locale based on the script of the text.
69    const char* localeStart = locales;
70    for (ssize_t i = 0; i < numLocales - 1; i++) { // Loop over all locales, except the last one.
71        const char* localeEnd = strchr(localeStart, ',');
72        const size_t localeNameLength = localeEnd - localeStart;
73        char localeName[localeNameLength + 1];
74        strncpy(localeName, localeStart, localeNameLength);
75        localeName[localeNameLength] = '\0';
76        mLocale = icu::Locale::createFromName(localeName);
77        goodLocaleFound = !mLocale.isBogus();
78        if (goodLocaleFound) {
79            mHyphenator = hyphenators[i];
80            break;
81        } else {
82            localeStart = localeEnd + 1;
83        }
84    }
85    if (!goodLocaleFound) { // Try the last locale.
86        mLocale = icu::Locale::createFromName(localeStart);
87        if (mLocale.isBogus()) {
88            // No good locale.
89            mLocale = icu::Locale::getRoot();
90            mHyphenator = nullptr;
91        } else {
92            mHyphenator = numLocales == 0 ? nullptr : hyphenators[numLocales - 1];
93        }
94    }
95    mWordBreaker.setLocale(mLocale);
96}
97
98void LineBreaker::setText() {
99    mWordBreaker.setText(mTextBuf.data(), mTextBuf.size());
100
101    // handle initial break here because addStyleRun may never be called
102    mWordBreaker.next();
103    mCandidates.clear();
104    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK};
105    mCandidates.push_back(cand);
106
107    // reset greedy breaker state
108    mBreaks.clear();
109    mWidths.clear();
110    mFlags.clear();
111    mLastBreak = 0;
112    mBestBreak = 0;
113    mBestScore = SCORE_INFTY;
114    mPreBreak = 0;
115    mLastHyphenation = HyphenEdit::NO_EDIT;
116    mFirstTabIndex = INT_MAX;
117    mSpaceCount = 0;
118}
119
120void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
121    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
122}
123
124
125void LineBreaker::setIndents(const std::vector<float>& indents) {
126    mLineWidths.setIndents(indents);
127}
128
129// This function determines whether a character is a space that disappears at end of line.
130// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
131// plus '\n'.
132// Note: all such characters are in the BMP, so it's ok to use code units for this.
133static bool isLineEndSpace(uint16_t c) {
134    return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
135            c == 0x205F || c == 0x3000;
136}
137
138// Ordinarily, this method measures the text in the range given. However, when paint
139// is nullptr, it assumes the widths have already been calculated and stored in the
140// width buffer.
141// This method finds the candidate word breaks (using the ICU break iterator) and sends them
142// to addCandidate.
143float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
144        FontStyle style, size_t start, size_t end, bool isRtl) {
145    float width = 0.0f;
146    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
147
148    float hyphenPenalty = 0.0;
149    if (paint != nullptr) {
150        width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags,
151                style, *paint, typeface, mCharWidths.data() + start);
152
153        // a heuristic that seems to perform well
154        hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
155        if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
156            hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
157        }
158
159        if (mJustified) {
160            // Make hyphenation more aggressive for fully justified text (so that "normal" in
161            // justified mode is the same as "full" in ragged-right).
162            hyphenPenalty *= 0.25;
163        } else {
164            // Line penalty is zero for justified text.
165            mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
166        }
167    }
168
169    size_t current = (size_t)mWordBreaker.current();
170    size_t afterWord = start;
171    size_t lastBreak = start;
172    ParaWidth lastBreakWidth = mWidth;
173    ParaWidth postBreak = mWidth;
174    size_t postSpaceCount = mSpaceCount;
175    for (size_t i = start; i < end; i++) {
176        uint16_t c = mTextBuf[i];
177        if (c == CHAR_TAB) {
178            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
179            if (mFirstTabIndex == INT_MAX) {
180                mFirstTabIndex = (int)i;
181            }
182            // fall back to greedy; other modes don't know how to deal with tabs
183            mStrategy = kBreakStrategy_Greedy;
184        } else {
185            if (isWordSpace(c)) mSpaceCount += 1;
186            mWidth += mCharWidths[i];
187            if (!isLineEndSpace(c)) {
188                postBreak = mWidth;
189                postSpaceCount = mSpaceCount;
190                afterWord = i + 1;
191            }
192        }
193        if (i + 1 == current) {
194            size_t wordStart = mWordBreaker.wordStart();
195            size_t wordEnd = mWordBreaker.wordEnd();
196            if (paint != nullptr && mHyphenator != nullptr &&
197                    mHyphenationFrequency != kHyphenationFrequency_None &&
198                    wordStart >= start && wordEnd > wordStart &&
199                    wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
200                mHyphenator->hyphenate(&mHyphBuf,
201                        &mTextBuf[wordStart],
202                        wordEnd - wordStart,
203                        mLocale);
204#if VERBOSE_DEBUG
205                std::string hyphenatedString;
206                for (size_t j = wordStart; j < wordEnd; j++) {
207                    if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) {
208                        hyphenatedString.push_back('-');
209                    }
210                    // Note: only works with ASCII, should do UTF-8 conversion here
211                    hyphenatedString.push_back(buffer()[j]);
212                }
213                ALOGD("hyphenated string: %s", hyphenatedString.c_str());
214#endif
215
216                // measure hyphenated substrings
217                for (size_t j = wordStart; j < wordEnd; j++) {
218                    HyphenationType hyph = mHyphBuf[j - wordStart];
219                    if (hyph != HyphenationType::DONT_BREAK) {
220                        paint->hyphenEdit = HyphenEdit::editForThisLine(hyph);
221                        const float firstPartWidth = Layout::measureText(mTextBuf.data(),
222                                lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style,
223                                *paint, typeface, nullptr);
224                        ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
225
226                        paint->hyphenEdit = HyphenEdit::editForNextLine(hyph);
227                        const float secondPartWidth = Layout::measureText(mTextBuf.data(), j,
228                                afterWord - j, mTextBuf.size(), bidiFlags, style, *paint,
229                                typeface, nullptr);
230                        ParaWidth hyphPreBreak = postBreak - secondPartWidth;
231
232                        addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount,
233                                hyphenPenalty, hyph);
234
235                        paint->hyphenEdit = HyphenEdit::NO_EDIT;
236                    }
237                }
238            }
239
240            // Skip break for zero-width characters inside replacement span
241            if (paint != nullptr || current == end || mCharWidths[current] > 0) {
242                float penalty = hyphenPenalty * mWordBreaker.breakBadness();
243                addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty,
244                        HyphenationType::DONT_BREAK);
245            }
246            lastBreak = current;
247            lastBreakWidth = mWidth;
248            current = (size_t)mWordBreaker.next();
249        }
250    }
251
252    return width;
253}
254
255// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
256// needed (ie when word exceeds current line width)
257void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
258        size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) {
259    Candidate cand;
260    ParaWidth width = mCandidates.back().preBreak;
261    if (postBreak - width > currentLineWidth()) {
262        // Add desperate breaks.
263        // Note: these breaks are based on the shaping of the (non-broken) original text; they
264        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
265        size_t i = mCandidates.back().offset;
266        width += mCharWidths[i++];
267        for (; i < offset; i++) {
268            float w = mCharWidths[i];
269            if (w > 0) {
270                cand.offset = i;
271                cand.preBreak = width;
272                cand.postBreak = width;
273                // postSpaceCount doesn't include trailing spaces
274                cand.preSpaceCount = postSpaceCount;
275                cand.postSpaceCount = postSpaceCount;
276                cand.penalty = SCORE_DESPERATE;
277                cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
278#if VERBOSE_DEBUG
279                ALOGD("desperate cand: %zd %g:%g",
280                        mCandidates.size(), cand.postBreak, cand.preBreak);
281#endif
282                addCandidate(cand);
283                width += w;
284            }
285        }
286    }
287
288    cand.offset = offset;
289    cand.preBreak = preBreak;
290    cand.postBreak = postBreak;
291    cand.penalty = penalty;
292    cand.preSpaceCount = preSpaceCount;
293    cand.postSpaceCount = postSpaceCount;
294    cand.hyphenType = hyph;
295#if VERBOSE_DEBUG
296    ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
297#endif
298    addCandidate(cand);
299}
300
301// Helper method for addCandidate()
302void LineBreaker::pushGreedyBreak() {
303    const Candidate& bestCandidate = mCandidates[mBestBreak];
304    pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak,
305            mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType));
306    mBestScore = SCORE_INFTY;
307#if VERBOSE_DEBUG
308    ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
309#endif
310    mLastBreak = mBestBreak;
311    mPreBreak = bestCandidate.preBreak;
312    mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType);
313}
314
315// TODO performance: could avoid populating mCandidates if greedy only
316void LineBreaker::addCandidate(Candidate cand) {
317    const size_t candIndex = mCandidates.size();
318    mCandidates.push_back(cand);
319
320    // mLastBreak is the index of the last line break we decided to do in mCandidates,
321    // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate
322    // we have found since then, and mBestScore is its penalty.
323    if (cand.postBreak - mPreBreak > currentLineWidth()) {
324        // This break would create an overfull line, pick the best break and break there (greedy)
325        if (mBestBreak == mLastBreak) {
326            // No good break has been found since last break. Break here.
327            mBestBreak = candIndex;
328        }
329        pushGreedyBreak();
330    }
331
332    while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) {
333        // We should rarely come here. But if we are here, we have broken the line, but the
334        // remaining part still doesn't fit. We now need to break at the second best place after the
335        // last break, but we have not kept that information, so we need to go back and find it.
336        //
337        // In some really rare cases, postBreak - preBreak of a candidate itself may be over the
338        // current line width. We protect ourselves against an infinite loop in that case by
339        // checking that we have not broken the line at this candidate already.
340        for (size_t i = mLastBreak + 1; i < candIndex; i++) {
341            const float penalty = mCandidates[i].penalty;
342            if (penalty <= mBestScore) {
343                mBestBreak = i;
344                mBestScore = penalty;
345            }
346        }
347        if (mBestBreak == mLastBreak) {
348            // We didn't find anything good. Break here.
349            mBestBreak = candIndex;
350        }
351        pushGreedyBreak();
352    }
353
354    if (cand.penalty <= mBestScore) {
355        mBestBreak = candIndex;
356        mBestScore = cand.penalty;
357    }
358}
359
360void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) {
361    mBreaks.push_back(offset);
362    mWidths.push_back(width);
363    int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
364    flags |= hyphenEdit;
365    mFlags.push_back(flags);
366    mFirstTabIndex = INT_MAX;
367}
368
369void LineBreaker::addReplacement(size_t start, size_t end, float width) {
370    mCharWidths[start] = width;
371    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
372    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
373}
374
375// Get the width of a space. May return 0 if there are no spaces.
376// Note: if there are multiple different widths for spaces (for example, because of mixing of
377// fonts), it's only guaranteed to pick one.
378float LineBreaker::getSpaceWidth() const {
379    for (size_t i = 0; i < mTextBuf.size(); i++) {
380        if (isWordSpace(mTextBuf[i])) {
381            return mCharWidths[i];
382        }
383    }
384    return 0.0f;
385}
386
387float LineBreaker::currentLineWidth() const {
388    return mLineWidths.getLineWidth(mBreaks.size());
389}
390
391void LineBreaker::computeBreaksGreedy() {
392    // All breaks but the last have been added in addCandidate already.
393    size_t nCand = mCandidates.size();
394    if (nCand == 1 || mLastBreak != nCand - 1) {
395        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak,
396                mLastHyphenation);
397        // don't need to update mBestScore, because we're done
398#if VERBOSE_DEBUG
399        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
400#endif
401    }
402}
403
404// Follow "prev" links in mCandidates array, and copy to result arrays.
405void LineBreaker::finishBreaksOptimal() {
406    // clear existing greedy break result
407    mBreaks.clear();
408    mWidths.clear();
409    mFlags.clear();
410    size_t nCand = mCandidates.size();
411    size_t prev;
412    for (size_t i = nCand - 1; i > 0; i = prev) {
413        prev = mCandidates[i].prev;
414        mBreaks.push_back(mCandidates[i].offset);
415        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
416        int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType);
417        if (prev > 0) {
418            flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType);
419        }
420        mFlags.push_back(flags);
421    }
422    std::reverse(mBreaks.begin(), mBreaks.end());
423    std::reverse(mWidths.begin(), mWidths.end());
424    std::reverse(mFlags.begin(), mFlags.end());
425}
426
427void LineBreaker::computeBreaksOptimal(bool isRectangle) {
428    size_t active = 0;
429    size_t nCand = mCandidates.size();
430    float width = mLineWidths.getLineWidth(0);
431    float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;
432
433    // "i" iterates through candidates for the end of the line.
434    for (size_t i = 1; i < nCand; i++) {
435        bool atEnd = i == nCand - 1;
436        float best = SCORE_INFTY;
437        size_t bestPrev = 0;
438        size_t lineNumberLast = 0;
439
440        if (!isRectangle) {
441            size_t lineNumberLast = mCandidates[active].lineNumber;
442            width = mLineWidths.getLineWidth(lineNumberLast);
443        }
444        ParaWidth leftEdge = mCandidates[i].postBreak - width;
445        float bestHope = 0;
446
447        // "j" iterates through candidates for the beginning of the line.
448        for (size_t j = active; j < i; j++) {
449            if (!isRectangle) {
450                size_t lineNumber = mCandidates[j].lineNumber;
451                if (lineNumber != lineNumberLast) {
452                    float widthNew = mLineWidths.getLineWidth(lineNumber);
453                    if (widthNew != width) {
454                        leftEdge = mCandidates[i].postBreak - width;
455                        bestHope = 0;
456                        width = widthNew;
457                    }
458                    lineNumberLast = lineNumber;
459                }
460            }
461            float jScore = mCandidates[j].score;
462            if (jScore + bestHope >= best) continue;
463            float delta = mCandidates[j].preBreak - leftEdge;
464
465            // compute width score for line
466
467            // Note: the "bestHope" optimization makes the assumption that, when delta is
468            // non-negative, widthScore will increase monotonically as successive candidate
469            // breaks are considered.
470            float widthScore = 0.0f;
471            float additionalPenalty = 0.0f;
472            if ((atEnd || !mJustified) && delta < 0) {
473                widthScore = SCORE_OVERFULL;
474            } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
475                // increase penalty for hyphen on last line
476                additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
477            } else {
478                widthScore = delta * delta;
479                if (delta < 0) {
480                    if (-delta < maxShrink *
481                            (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) {
482                        widthScore *= SHRINK_PENALTY_MULTIPLIER;
483                    } else {
484                        widthScore = SCORE_OVERFULL;
485                    }
486                }
487            }
488
489            if (delta < 0) {
490                active = j + 1;
491            } else {
492                bestHope = widthScore;
493            }
494
495            float score = jScore + widthScore + additionalPenalty;
496            if (score <= best) {
497                best = score;
498                bestPrev = j;
499            }
500        }
501        mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
502        mCandidates[i].prev = bestPrev;
503        mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
504#if VERBOSE_DEBUG
505        ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev);
506#endif
507    }
508    finishBreaksOptimal();
509}
510
511size_t LineBreaker::computeBreaks() {
512    if (mStrategy == kBreakStrategy_Greedy) {
513        computeBreaksGreedy();
514    } else {
515        computeBreaksOptimal(mLineWidths.isConstant());
516    }
517    return mBreaks.size();
518}
519
520void LineBreaker::finish() {
521    mWordBreaker.finish();
522    mWidth = 0;
523    mLineWidths.clear();
524    mCandidates.clear();
525    mBreaks.clear();
526    mWidths.clear();
527    mFlags.clear();
528    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
529        mTextBuf.clear();
530        mTextBuf.shrink_to_fit();
531        mCharWidths.clear();
532        mCharWidths.shrink_to_fit();
533        mHyphBuf.clear();
534        mHyphBuf.shrink_to_fit();
535        mCandidates.shrink_to_fit();
536        mBreaks.shrink_to_fit();
537        mWidths.shrink_to_fit();
538        mFlags.shrink_to_fit();
539    }
540    mStrategy = kBreakStrategy_Greedy;
541    mHyphenationFrequency = kHyphenationFrequency_Normal;
542    mLinePenalty = 0.0f;
543    mJustified = false;
544}
545
546}  // namespace minikin
547