1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef MINIKIN_LINE_BREAKER_UTIL_H
18#define MINIKIN_LINE_BREAKER_UTIL_H
19
20#include <vector>
21
22#include "minikin/Hyphenator.h"
23#include "minikin/MeasuredText.h"
24#include "minikin/U16StringPiece.h"
25
26#include "HyphenatorMap.h"
27#include "LayoutUtils.h"
28#include "Locale.h"
29#include "LocaleListCache.h"
30#include "MinikinInternal.h"
31#include "WordBreaker.h"
32
33namespace minikin {
34
35// ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for very large
36// paragraphs, accuracy could degrade using only 32-bit float. Note however that float is used
37// extensively on the Java side for this. This is a typedef so that we can easily change it based
38// on performance/accuracy tradeoff.
39typedef double ParaWidth;
40
41// Hyphenates a string potentially containing non-breaking spaces.
42std::vector<HyphenationType> hyphenate(const U16StringPiece& string, const Hyphenator& hypenator);
43
44// This function determines whether a character is a space that disappears at end of line.
45// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'.
46// Note: all such characters are in the BMP, so it's ok to use code units for this.
47inline bool isLineEndSpace(uint16_t c) {
48    return c == '\n' || c == ' '                           // SPACE
49           || c == 0x1680                                  // OGHAM SPACE MARK
50           || (0x2000 <= c && c <= 0x200A && c != 0x2007)  // EN QUAD, EM QUAD, EN SPACE, EM SPACE,
51           // THREE-PER-EM SPACE, FOUR-PER-EM SPACE,
52           // SIX-PER-EM SPACE, PUNCTUATION SPACE,
53           // THIN SPACE, HAIR SPACE
54           || c == 0x205F  // MEDIUM MATHEMATICAL SPACE
55           || c == 0x3000;
56}
57
58// Returns true if the character needs to be excluded for the line spacing.
59inline bool isLineSpaceExcludeChar(uint16_t c) {
60    return c == CHAR_LINE_FEED || c == CHAR_CARRIAGE_RETURN;
61}
62
63inline Locale getEffectiveLocale(uint32_t localeListId) {
64    const LocaleList& localeList = LocaleListCache::getById(localeListId);
65    return localeList.empty() ? Locale() : localeList[0];
66}
67
68// Retrieves hyphenation break points from a word.
69inline void populateHyphenationPoints(
70        const U16StringPiece& textBuf,        // A text buffer.
71        const Run& run,                       // A run of this region.
72        const Hyphenator& hyphenator,         // A hyphenator to be used for hyphenation.
73        const Range& contextRange,            // A context range for measuring hyphenated piece.
74        const Range& hyphenationTargetRange,  // An actual range for the hyphenation target.
75        std::vector<HyphenBreak>* out,        // An output to be appended.
76        LayoutPieces* pieces) {               // An output of layout pieces. Maybe null.
77    if (!run.getRange().contains(contextRange) || !contextRange.contains(hyphenationTargetRange)) {
78        return;
79    }
80
81    const std::vector<HyphenationType> hyphenResult =
82            hyphenate(textBuf.substr(hyphenationTargetRange), hyphenator);
83    for (uint32_t i = hyphenationTargetRange.getStart(); i < hyphenationTargetRange.getEnd(); ++i) {
84        const HyphenationType hyph = hyphenResult[hyphenationTargetRange.toRangeOffset(i)];
85        if (hyph == HyphenationType::DONT_BREAK) {
86            continue;  // Not a hyphenation point.
87        }
88
89        auto hyphenPart = contextRange.split(i);
90        U16StringPiece firstText = textBuf.substr(hyphenPart.first);
91        U16StringPiece secondText = textBuf.substr(hyphenPart.second);
92        const float first = run.measureHyphenPiece(firstText, Range(0, firstText.size()),
93                                                   StartHyphenEdit::NO_EDIT /* start hyphen edit */,
94                                                   editForThisLine(hyph) /* end hyphen edit */,
95                                                   nullptr /* advances */, pieces);
96        const float second = run.measureHyphenPiece(secondText, Range(0, secondText.size()),
97                                                    editForNextLine(hyph) /* start hyphen edit */,
98                                                    EndHyphenEdit::NO_EDIT /* end hyphen edit */,
99                                                    nullptr /* advances */, pieces);
100
101        out->emplace_back(i, hyph, first, second);
102    }
103}
104
105// Processes and retrieve informations from characters in the paragraph.
106struct CharProcessor {
107    // The number of spaces.
108    uint32_t rawSpaceCount = 0;
109
110    // The number of spaces minus trailing spaces.
111    uint32_t effectiveSpaceCount = 0;
112
113    // The sum of character width from the paragraph start.
114    ParaWidth sumOfCharWidths = 0.0;
115
116    // The sum of character width from the paragraph start minus trailing line end spaces.
117    // This means that the line width from the paragraph start if we decided break now.
118    ParaWidth effectiveWidth = 0.0;
119
120    // The total amount of character widths at the previous word break point.
121    ParaWidth sumOfCharWidthsAtPrevWordBreak = 0.0;
122
123    // The next word break offset.
124    uint32_t nextWordBreak = 0;
125
126    // The previous word break offset.
127    uint32_t prevWordBreak = 0;
128
129    // The width of a space. May be 0 if there are no spaces.
130    // Note: if there are multiple different widths for spaces (for example, because of mixing of
131    // fonts), it's only guaranteed to pick one.
132    float spaceWidth = 0.0f;
133
134    // The current hyphenator.
135    const Hyphenator* hyphenator = nullptr;
136
137    // Retrieve the current word range.
138    inline Range wordRange() const { return breaker.wordRange(); }
139
140    // Retrieve the current context range.
141    inline Range contextRange() const { return Range(prevWordBreak, nextWordBreak); }
142
143    // Returns the width from the last word break point.
144    inline ParaWidth widthFromLastWordBreak() const {
145        return effectiveWidth - sumOfCharWidthsAtPrevWordBreak;
146    }
147
148    // Returns the break penalty for the current word break point.
149    inline int wordBreakPenalty() const { return breaker.breakBadness(); }
150
151    CharProcessor(const U16StringPiece& text) { breaker.setText(text.data(), text.size()); }
152
153    // The user of CharProcessor must call updateLocaleIfNecessary with valid locale at least one
154    // time before feeding characters.
155    void updateLocaleIfNecessary(const Run& run) {
156        // Update locale if necessary.
157        uint32_t newLocaleListId = run.getLocaleListId();
158        if (localeListId != newLocaleListId) {
159            Locale locale = getEffectiveLocale(newLocaleListId);
160            nextWordBreak = breaker.followingWithLocale(locale, run.getRange().getStart());
161            hyphenator = HyphenatorMap::lookup(locale);
162            localeListId = newLocaleListId;
163        }
164    }
165
166    // Process one character.
167    void feedChar(uint32_t idx, uint16_t c, float w) {
168        if (idx == nextWordBreak) {
169            prevWordBreak = nextWordBreak;
170            nextWordBreak = breaker.next();
171            sumOfCharWidthsAtPrevWordBreak = sumOfCharWidths;
172        }
173        if (isWordSpace(c)) {
174            rawSpaceCount += 1;
175            spaceWidth = w;
176        }
177        sumOfCharWidths += w;
178        if (isLineEndSpace(c)) {
179            // If we break a line on a line-ending space, that space goes away. So postBreak
180            // and postSpaceCount, which keep the width and number of spaces if we decide to
181            // break at this point, don't need to get adjusted.
182        } else {
183            effectiveSpaceCount = rawSpaceCount;
184            effectiveWidth = sumOfCharWidths;
185        }
186    }
187
188private:
189    // The current locale list id.
190    uint32_t localeListId = LocaleListCache::kInvalidListId;
191
192    WordBreaker breaker;
193};
194}  // namespace minikin
195
196#endif  // MINIKIN_LINE_BREAKER_UTIL_H
197