1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef MINIKIN_LINE_BREAKER_UTIL_H 18#define MINIKIN_LINE_BREAKER_UTIL_H 19 20#include <vector> 21 22#include "minikin/Hyphenator.h" 23#include "minikin/MeasuredText.h" 24#include "minikin/U16StringPiece.h" 25 26#include "HyphenatorMap.h" 27#include "LayoutUtils.h" 28#include "Locale.h" 29#include "LocaleListCache.h" 30#include "MinikinInternal.h" 31#include "WordBreaker.h" 32 33namespace minikin { 34 35// ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for very large 36// paragraphs, accuracy could degrade using only 32-bit float. Note however that float is used 37// extensively on the Java side for this. This is a typedef so that we can easily change it based 38// on performance/accuracy tradeoff. 39typedef double ParaWidth; 40 41// Hyphenates a string potentially containing non-breaking spaces. 42std::vector<HyphenationType> hyphenate(const U16StringPiece& string, const Hyphenator& hypenator); 43 44// This function determines whether a character is a space that disappears at end of line. 45// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'. 46// Note: all such characters are in the BMP, so it's ok to use code units for this. 47inline bool isLineEndSpace(uint16_t c) { 48 return c == '\n' || c == ' ' // SPACE 49 || c == 0x1680 // OGHAM SPACE MARK 50 || (0x2000 <= c && c <= 0x200A && c != 0x2007) // EN QUAD, EM QUAD, EN SPACE, EM SPACE, 51 // THREE-PER-EM SPACE, FOUR-PER-EM SPACE, 52 // SIX-PER-EM SPACE, PUNCTUATION SPACE, 53 // THIN SPACE, HAIR SPACE 54 || c == 0x205F // MEDIUM MATHEMATICAL SPACE 55 || c == 0x3000; 56} 57 58// Returns true if the character needs to be excluded for the line spacing. 59inline bool isLineSpaceExcludeChar(uint16_t c) { 60 return c == CHAR_LINE_FEED || c == CHAR_CARRIAGE_RETURN; 61} 62 63inline Locale getEffectiveLocale(uint32_t localeListId) { 64 const LocaleList& localeList = LocaleListCache::getById(localeListId); 65 return localeList.empty() ? Locale() : localeList[0]; 66} 67 68// Retrieves hyphenation break points from a word. 69inline void populateHyphenationPoints( 70 const U16StringPiece& textBuf, // A text buffer. 71 const Run& run, // A run of this region. 72 const Hyphenator& hyphenator, // A hyphenator to be used for hyphenation. 73 const Range& contextRange, // A context range for measuring hyphenated piece. 74 const Range& hyphenationTargetRange, // An actual range for the hyphenation target. 75 std::vector<HyphenBreak>* out, // An output to be appended. 76 LayoutPieces* pieces) { // An output of layout pieces. Maybe null. 77 if (!run.getRange().contains(contextRange) || !contextRange.contains(hyphenationTargetRange)) { 78 return; 79 } 80 81 const std::vector<HyphenationType> hyphenResult = 82 hyphenate(textBuf.substr(hyphenationTargetRange), hyphenator); 83 for (uint32_t i = hyphenationTargetRange.getStart(); i < hyphenationTargetRange.getEnd(); ++i) { 84 const HyphenationType hyph = hyphenResult[hyphenationTargetRange.toRangeOffset(i)]; 85 if (hyph == HyphenationType::DONT_BREAK) { 86 continue; // Not a hyphenation point. 87 } 88 89 auto hyphenPart = contextRange.split(i); 90 U16StringPiece firstText = textBuf.substr(hyphenPart.first); 91 U16StringPiece secondText = textBuf.substr(hyphenPart.second); 92 const float first = run.measureHyphenPiece(firstText, Range(0, firstText.size()), 93 StartHyphenEdit::NO_EDIT /* start hyphen edit */, 94 editForThisLine(hyph) /* end hyphen edit */, 95 nullptr /* advances */, pieces); 96 const float second = run.measureHyphenPiece(secondText, Range(0, secondText.size()), 97 editForNextLine(hyph) /* start hyphen edit */, 98 EndHyphenEdit::NO_EDIT /* end hyphen edit */, 99 nullptr /* advances */, pieces); 100 101 out->emplace_back(i, hyph, first, second); 102 } 103} 104 105// Processes and retrieve informations from characters in the paragraph. 106struct CharProcessor { 107 // The number of spaces. 108 uint32_t rawSpaceCount = 0; 109 110 // The number of spaces minus trailing spaces. 111 uint32_t effectiveSpaceCount = 0; 112 113 // The sum of character width from the paragraph start. 114 ParaWidth sumOfCharWidths = 0.0; 115 116 // The sum of character width from the paragraph start minus trailing line end spaces. 117 // This means that the line width from the paragraph start if we decided break now. 118 ParaWidth effectiveWidth = 0.0; 119 120 // The total amount of character widths at the previous word break point. 121 ParaWidth sumOfCharWidthsAtPrevWordBreak = 0.0; 122 123 // The next word break offset. 124 uint32_t nextWordBreak = 0; 125 126 // The previous word break offset. 127 uint32_t prevWordBreak = 0; 128 129 // The width of a space. May be 0 if there are no spaces. 130 // Note: if there are multiple different widths for spaces (for example, because of mixing of 131 // fonts), it's only guaranteed to pick one. 132 float spaceWidth = 0.0f; 133 134 // The current hyphenator. 135 const Hyphenator* hyphenator = nullptr; 136 137 // Retrieve the current word range. 138 inline Range wordRange() const { return breaker.wordRange(); } 139 140 // Retrieve the current context range. 141 inline Range contextRange() const { return Range(prevWordBreak, nextWordBreak); } 142 143 // Returns the width from the last word break point. 144 inline ParaWidth widthFromLastWordBreak() const { 145 return effectiveWidth - sumOfCharWidthsAtPrevWordBreak; 146 } 147 148 // Returns the break penalty for the current word break point. 149 inline int wordBreakPenalty() const { return breaker.breakBadness(); } 150 151 CharProcessor(const U16StringPiece& text) { breaker.setText(text.data(), text.size()); } 152 153 // The user of CharProcessor must call updateLocaleIfNecessary with valid locale at least one 154 // time before feeding characters. 155 void updateLocaleIfNecessary(const Run& run) { 156 // Update locale if necessary. 157 uint32_t newLocaleListId = run.getLocaleListId(); 158 if (localeListId != newLocaleListId) { 159 Locale locale = getEffectiveLocale(newLocaleListId); 160 nextWordBreak = breaker.followingWithLocale(locale, run.getRange().getStart()); 161 hyphenator = HyphenatorMap::lookup(locale); 162 localeListId = newLocaleListId; 163 } 164 } 165 166 // Process one character. 167 void feedChar(uint32_t idx, uint16_t c, float w) { 168 if (idx == nextWordBreak) { 169 prevWordBreak = nextWordBreak; 170 nextWordBreak = breaker.next(); 171 sumOfCharWidthsAtPrevWordBreak = sumOfCharWidths; 172 } 173 if (isWordSpace(c)) { 174 rawSpaceCount += 1; 175 spaceWidth = w; 176 } 177 sumOfCharWidths += w; 178 if (isLineEndSpace(c)) { 179 // If we break a line on a line-ending space, that space goes away. So postBreak 180 // and postSpaceCount, which keep the width and number of spaces if we decide to 181 // break at this point, don't need to get adjusted. 182 } else { 183 effectiveSpaceCount = rawSpaceCount; 184 effectiveWidth = sumOfCharWidths; 185 } 186 } 187 188private: 189 // The current locale list id. 190 uint32_t localeListId = LocaleListCache::kInvalidListId; 191 192 WordBreaker breaker; 193}; 194} // namespace minikin 195 196#endif // MINIKIN_LINE_BREAKER_UTIL_H 197