OptimalLineBreaker.cpp revision c9611626465f9e11817854f554e7b7d0c6cee905
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OptimalLineBreaker.h"
18
19#include <algorithm>
20#include <limits>
21
22#include "minikin/Characters.h"
23#include "minikin/Layout.h"
24#include "minikin/Range.h"
25#include "minikin/U16StringPiece.h"
26
27#include "HyphenatorMap.h"
28#include "LayoutUtils.h"
29#include "LineBreakerUtil.h"
30#include "Locale.h"
31#include "LocaleListCache.h"
32#include "MinikinInternal.h"
33#include "WordBreaker.h"
34
35namespace minikin {
36
37namespace {
38
39// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
40// constants are larger than any reasonable actual width score.
41constexpr float SCORE_INFTY = std::numeric_limits<float>::max();
42constexpr float SCORE_OVERFULL = 1e12f;
43constexpr float SCORE_DESPERATE = 1e10f;
44
45// Multiplier for hyphen penalty on last line.
46constexpr float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
47// Penalty assigned to each line break (to try to minimize number of lines)
48// TODO: when we implement full justification (so spaces can shrink and stretch), this is
49// probably not the most appropriate method.
50constexpr float LINE_PENALTY_MULTIPLIER = 2.0f;
51
52// Penalty assigned to shrinking the whitepsace.
53constexpr float SHRINK_PENALTY_MULTIPLIER = 4.0f;
54
55// Maximum amount that spaces can shrink, in justified text.
56constexpr float SHRINKABILITY = 1.0 / 3.0;
57
58// A single candidate break
59struct Candidate {
60    uint32_t offset;  // offset to text buffer, in code units
61
62    ParaWidth preBreak;       // width of text until this point, if we decide to not break here:
63                              // preBreak is used as an optimized way to calculate the width
64                              // between two candidates. The line width between two line break
65                              // candidates i and j is calculated as postBreak(j) - preBreak(i).
66    ParaWidth postBreak;      // width of text until this point, if we decide to break here
67    float penalty;            // penalty of this break (for example, hyphen penalty)
68    uint32_t preSpaceCount;   // preceding space count before breaking
69    uint32_t postSpaceCount;  // preceding space count after breaking
70    HyphenationType hyphenType;
71    bool isRtl;  // The direction of the bidi run containing or ending in this candidate
72
73    Candidate(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
74              uint32_t preSpaceCount, uint32_t postSpaceCount, HyphenationType hyphenType,
75              bool isRtl)
76            : offset(offset),
77              preBreak(preBreak),
78              postBreak(postBreak),
79              penalty(penalty),
80              preSpaceCount(preSpaceCount),
81              postSpaceCount(postSpaceCount),
82              hyphenType(hyphenType),
83              isRtl(isRtl) {}
84};
85
86// A context of line break optimization.
87struct OptimizeContext {
88    // The break candidates.
89    std::vector<Candidate> candidates;
90
91    // The penalty for the number of lines.
92    float linePenalty = 0.0f;
93
94    // The width of a space. May be 0 if there are no spaces.
95    // Note: if there are multiple different widths for spaces (for example, because of mixing of
96    // fonts), it's only guaranteed to pick one.
97    float spaceWidth = 0.0f;
98
99    // Append desperate break point to the candidates.
100    inline void pushDesperate(uint32_t offset, ParaWidth sumOfCharWidths, uint32_t spaceCount,
101                              bool isRtl) {
102        candidates.emplace_back(offset, sumOfCharWidths, sumOfCharWidths, SCORE_DESPERATE,
103                                spaceCount, spaceCount,
104                                HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, isRtl);
105    }
106
107    // Append hyphenation break point to the candidates.
108    inline void pushHyphenation(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak,
109                                float penalty, uint32_t spaceCount, HyphenationType type,
110                                bool isRtl) {
111        candidates.emplace_back(offset, preBreak, postBreak, penalty, spaceCount, spaceCount, type,
112                                isRtl);
113    }
114
115    // Append word break point to the candidates.
116    inline void pushWordBreak(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak,
117                              float penalty, uint32_t preSpaceCount, uint32_t postSpaceCount,
118                              bool isRtl) {
119        candidates.emplace_back(offset, preBreak, postBreak, penalty, preSpaceCount, postSpaceCount,
120                                HyphenationType::DONT_BREAK, isRtl);
121    }
122
123    OptimizeContext() {
124        candidates.emplace_back(0, 0.0f, 0.0f, 0.0f, 0, 0, HyphenationType::DONT_BREAK, false);
125    }
126};
127
128// Compute the penalty for the run and returns penalty for hyphenation and number of lines.
129std::pair<float, float> computePenalties(const Run& run, const LineWidth& lineWidth,
130                                         HyphenationFrequency frequency, bool justified) {
131    float linePenalty = 0.0;
132    const MinikinPaint* paint = run.getPaint();
133    // a heuristic that seems to perform well
134    float hyphenPenalty = 0.5 * paint->size * paint->scaleX * lineWidth.getAt(0);
135    if (frequency == HyphenationFrequency::Normal) {
136        hyphenPenalty *= 4.0;  // TODO: Replace with a better value after some testing
137    }
138
139    if (justified) {
140        // Make hyphenation more aggressive for fully justified text (so that "normal" in
141        // justified mode is the same as "full" in ragged-right).
142        hyphenPenalty *= 0.25;
143    } else {
144        // Line penalty is zero for justified text.
145        linePenalty = hyphenPenalty * LINE_PENALTY_MULTIPLIER;
146    }
147
148    return std::make_pair(hyphenPenalty, linePenalty);
149}
150
151// Represents a desperate break point.
152struct DesperateBreak {
153    // The break offset.
154    uint32_t offset;
155
156    // The sum of the character width from the beginning of the word.
157    ParaWidth sumOfChars;
158
159    DesperateBreak(uint32_t offset, ParaWidth sumOfChars)
160            : offset(offset), sumOfChars(sumOfChars){};
161};
162
163// Retrieves desperate break points from a word.
164std::vector<DesperateBreak> populateDesperatePoints(const MeasuredText& measured,
165                                                    const Range& range) {
166    std::vector<DesperateBreak> out;
167    ParaWidth width = measured.widths[range.getStart()];
168    for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
169        const float w = measured.widths[i];
170        if (w == 0) {
171            continue;  // w == 0 means here is not a grapheme bounds. Don't break here.
172        }
173        out.emplace_back(i, width);
174        width += w;
175    }
176    return out;
177}
178
179// Append hyphenation break points and desperate break points.
180// If an offset is a both candidate for hyphenation and desperate break points, place desperate
181// break candidate first and hyphenation break points second since the result width of the desperate
182// break is shorter than hyphenation break.
183// This is important since DP in computeBreaksOptimal assumes that the result line width is
184// increased by break offset.
185void appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter,
186                       std::vector<HyphenBreak>::const_iterator endHyIter,
187                       const std::vector<DesperateBreak>& desperates, const CharProcessor& proc,
188                       float hyphenPenalty, bool isRtl, OptimizeContext* out) {
189    auto d = desperates.begin();
190    while (hyIter != endHyIter || d != desperates.end()) {
191        // If both hyphen breaks and desperate breaks point to the same offset, push desperate
192        // breaks first.
193        if (d != desperates.end() && (hyIter == endHyIter || d->offset <= hyIter->offset)) {
194            out->pushDesperate(d->offset, proc.sumOfCharWidthsAtPrevWordBreak + d->sumOfChars,
195                               proc.effectiveSpaceCount, isRtl);
196            d++;
197        } else {
198            out->pushHyphenation(hyIter->offset, proc.sumOfCharWidths - hyIter->second,
199                                 proc.sumOfCharWidthsAtPrevWordBreak + hyIter->first, hyphenPenalty,
200                                 proc.effectiveSpaceCount, hyIter->type, isRtl);
201            hyIter++;
202        }
203    }
204}
205
206// Enumerate all line break candidates.
207OptimizeContext populateCandidates(const U16StringPiece& textBuf, const MeasuredText& measured,
208                                   const LineWidth& lineWidth, HyphenationFrequency frequency,
209                                   bool isJustified) {
210    const ParaWidth minLineWidth = lineWidth.getMin();
211    CharProcessor proc(textBuf);
212
213    OptimizeContext result;
214
215    const bool doHyphenation = frequency != HyphenationFrequency::None;
216    auto hyIter = std::begin(measured.hyphenBreaks);
217
218    for (const auto& run : measured.runs) {
219        const bool isRtl = run->isRtl();
220        const Range& range = run->getRange();
221
222        // Compute penalty parameters.
223        float hyphenPenalty = 0.0f;
224        if (run->canHyphenate()) {
225            auto penalties = computePenalties(*run, lineWidth, frequency, isJustified);
226            hyphenPenalty = penalties.first;
227            result.linePenalty = std::max(penalties.second, result.linePenalty);
228        }
229
230        proc.updateLocaleIfNecessary(*run);
231
232        for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
233            MINIKIN_ASSERT(textBuf[i] != CHAR_TAB, "TAB is not supported in optimal line breaker");
234            proc.feedChar(i, textBuf[i], measured.widths[i]);
235
236            const uint32_t nextCharOffset = i + 1;
237            if (nextCharOffset != proc.nextWordBreak) {
238                continue;  // Wait until word break point.
239            }
240
241            // Add hyphenation and desperate break points.
242            std::vector<HyphenBreak> hyphenedBreaks;
243            std::vector<DesperateBreak> desperateBreaks;
244            const Range contextRange = proc.contextRange();
245
246            auto beginHyIter = hyIter;
247            while (hyIter != std::end(measured.hyphenBreaks) &&
248                   hyIter->offset < contextRange.getEnd()) {
249                hyIter++;
250            }
251            if (proc.widthFromLastWordBreak() > minLineWidth) {
252                desperateBreaks = populateDesperatePoints(measured, contextRange);
253            }
254            appendWithMerging(beginHyIter, doHyphenation ? hyIter : beginHyIter, desperateBreaks,
255                              proc, hyphenPenalty, isRtl, &result);
256
257            // We skip breaks for zero-width characters inside replacement spans.
258            if (nextCharOffset == range.getEnd() || measured.widths[nextCharOffset] > 0) {
259                const float penalty = hyphenPenalty * proc.wordBreakPenalty();
260                result.pushWordBreak(nextCharOffset, proc.sumOfCharWidths, proc.effectiveWidth,
261                                     penalty, proc.rawSpaceCount, proc.effectiveSpaceCount, isRtl);
262            }
263        }
264    }
265    result.spaceWidth = proc.spaceWidth;
266    return result;
267}
268
269class LineBreakOptimizer {
270public:
271    LineBreakOptimizer() {}
272
273    LineBreakResult computeBreaks(const OptimizeContext& context, const MeasuredText& measuredText,
274                                  const LineWidth& lineWidth, BreakStrategy strategy,
275                                  bool justified);
276
277private:
278    // Data used to compute optimal line breaks
279    struct OptimalBreaksData {
280        float score;          // best score found for this break
281        uint32_t prev;        // index to previous break
282        uint32_t lineNumber;  // the computed line number of the candidate
283    };
284    LineBreakResult finishBreaksOptimal(const MeasuredText& measured,
285                                        const std::vector<OptimalBreaksData>& breaksData,
286                                        const std::vector<Candidate>& candidates);
287
288    MinikinExtent computeMaxExtent(const MeasuredText& measured, uint32_t start,
289                                   uint32_t end) const;
290};
291
292// Find the needed extent between the start and end ranges. start is inclusive and end is exclusive.
293// Both are indices of the source string.
294MinikinExtent LineBreakOptimizer::computeMaxExtent(const MeasuredText& measured, uint32_t start,
295                                                   uint32_t end) const {
296    MinikinExtent res = {0.0, 0.0, 0.0};
297    for (uint32_t j = start; j < end; j++) {
298        res.extendBy(measured.extents[j]);
299    }
300    return res;
301}
302
303// Follow "prev" links in candidates array, and copy to result arrays.
304LineBreakResult LineBreakOptimizer::finishBreaksOptimal(
305        const MeasuredText& measured, const std::vector<OptimalBreaksData>& breaksData,
306        const std::vector<Candidate>& candidates) {
307    LineBreakResult result;
308    const uint32_t nCand = candidates.size();
309    uint32_t prevIndex;
310    for (uint32_t i = nCand - 1; i > 0; i = prevIndex) {
311        prevIndex = breaksData[i].prev;
312        const Candidate& cand = candidates[i];
313        const Candidate& prev = candidates[prevIndex];
314
315        result.breakPoints.push_back(cand.offset);
316        result.widths.push_back(cand.postBreak - prev.preBreak);
317        MinikinExtent extent = computeMaxExtent(measured, prev.offset, cand.offset);
318        result.ascents.push_back(extent.ascent);
319        result.descents.push_back(extent.descent);
320
321        const HyphenEdit edit =
322                packHyphenEdit(editForNextLine(prev.hyphenType), editForThisLine(cand.hyphenType));
323        result.flags.push_back(static_cast<int>(edit));
324    }
325    result.reverse();
326    return result;
327}
328
329LineBreakResult LineBreakOptimizer::computeBreaks(const OptimizeContext& context,
330                                                  const MeasuredText& measured,
331                                                  const LineWidth& lineWidth,
332                                                  BreakStrategy strategy, bool justified) {
333    const std::vector<Candidate>& candidates = context.candidates;
334    uint32_t active = 0;
335    const uint32_t nCand = candidates.size();
336    const float maxShrink = justified ? SHRINKABILITY * context.spaceWidth : 0.0f;
337
338    std::vector<OptimalBreaksData> breaksData;
339    breaksData.reserve(nCand);
340    breaksData.push_back({0.0, 0, 0});  // The first candidate is always at the first line.
341
342    // "i" iterates through candidates for the end of the line.
343    for (uint32_t i = 1; i < nCand; i++) {
344        const bool atEnd = i == nCand - 1;
345        float best = SCORE_INFTY;
346        uint32_t bestPrev = 0;
347
348        uint32_t lineNumberLast = breaksData[active].lineNumber;
349        float width = lineWidth.getAt(lineNumberLast);
350
351        ParaWidth leftEdge = candidates[i].postBreak - width;
352        float bestHope = 0;
353
354        // "j" iterates through candidates for the beginning of the line.
355        for (uint32_t j = active; j < i; j++) {
356            const uint32_t lineNumber = breaksData[j].lineNumber;
357            if (lineNumber != lineNumberLast) {
358                const float widthNew = lineWidth.getAt(lineNumber);
359                if (widthNew != width) {
360                    leftEdge = candidates[i].postBreak - width;
361                    bestHope = 0;
362                    width = widthNew;
363                }
364                lineNumberLast = lineNumber;
365            }
366            const float jScore = breaksData[j].score;
367            if (jScore + bestHope >= best) continue;
368            const float delta = candidates[j].preBreak - leftEdge;
369
370            // compute width score for line
371
372            // Note: the "bestHope" optimization makes the assumption that, when delta is
373            // non-negative, widthScore will increase monotonically as successive candidate
374            // breaks are considered.
375            float widthScore = 0.0f;
376            float additionalPenalty = 0.0f;
377            if ((atEnd || !justified) && delta < 0) {
378                widthScore = SCORE_OVERFULL;
379            } else if (atEnd && strategy != BreakStrategy::Balanced) {
380                // increase penalty for hyphen on last line
381                additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * candidates[j].penalty;
382            } else {
383                widthScore = delta * delta;
384                if (delta < 0) {
385                    if (-delta <
386                        maxShrink * (candidates[i].postSpaceCount - candidates[j].preSpaceCount)) {
387                        widthScore *= SHRINK_PENALTY_MULTIPLIER;
388                    } else {
389                        widthScore = SCORE_OVERFULL;
390                    }
391                }
392            }
393
394            if (delta < 0) {
395                active = j + 1;
396            } else {
397                bestHope = widthScore;
398            }
399
400            const float score = jScore + widthScore + additionalPenalty;
401            if (score <= best) {
402                best = score;
403                bestPrev = j;
404            }
405        }
406        breaksData.push_back({best + candidates[i].penalty + context.linePenalty,  // score
407                              bestPrev,                                            // prev
408                              breaksData[bestPrev].lineNumber + 1});               // lineNumber
409    }
410    return finishBreaksOptimal(measured, breaksData, candidates);
411}
412
413}  // namespace
414
415LineBreakResult breakLineOptimal(const U16StringPiece& textBuf, const MeasuredText& measured,
416                                 const LineWidth& lineWidth, BreakStrategy strategy,
417                                 HyphenationFrequency frequency, bool justified) {
418    if (textBuf.size() == 0) {
419        return LineBreakResult();
420    }
421    const OptimizeContext context =
422            populateCandidates(textBuf, measured, lineWidth, frequency, justified);
423    LineBreakOptimizer optimizer;
424    return optimizer.computeBreaks(context, measured, lineWidth, strategy, justified);
425}
426
427}  // namespace minikin
428