LineBreaker.cpp revision 0948fbb63636111c193365e01dbe952defd700f3
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "LineBreakerImpl.h" 18 19#include <algorithm> 20#include <limits> 21 22#include "minikin/Characters.h" 23#include "minikin/Layout.h" 24#include "minikin/Range.h" 25#include "minikin/U16StringPiece.h" 26 27#include "HyphenatorMap.h" 28#include "LayoutUtils.h" 29#include "Locale.h" 30#include "LocaleListCache.h" 31#include "MinikinInternal.h" 32#include "WordBreaker.h" 33 34namespace minikin { 35 36// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these 37// constants are larger than any reasonable actual width score. 38const float SCORE_INFTY = std::numeric_limits<float>::max(); 39const float SCORE_OVERFULL = 1e12f; 40const float SCORE_DESPERATE = 1e10f; 41 42// Multiplier for hyphen penalty on last line. 43const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f; 44// Penalty assigned to each line break (to try to minimize number of lines) 45// TODO: when we implement full justification (so spaces can shrink and stretch), this is 46// probably not the most appropriate method. 47const float LINE_PENALTY_MULTIPLIER = 2.0f; 48 49// Penalty assigned to shrinking the whitepsace. 50const float SHRINK_PENALTY_MULTIPLIER = 4.0f; 51 52// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for 53// unreasonably long words. This is somewhat of a heuristic because extremely long words 54// are possible in some languages. This does mean that very long real words can get 55// broken by desperate breaks, with no hyphens. 56const size_t LONGEST_HYPHENATED_WORD = 45; 57 58// Maximum amount that spaces can shrink, in justified text. 59const float SHRINKABILITY = 1.0 / 3.0; 60 61constexpr size_t LAST_BREAK_OFFSET_NOWHERE = SIZE_MAX; 62constexpr size_t LAST_BREAK_OFFSET_DESPERATE = LAST_BREAK_OFFSET_NOWHERE - 1; 63 64inline static const LocaleList& getLocaleList(uint32_t localeListId) { 65 android::AutoMutex _l(gMinikinLock); 66 return LocaleListCache::getById(localeListId); 67} 68 69LineBreakerImpl::LineBreakerImpl(const U16StringPiece& textBuffer, BreakStrategy strategy, 70 HyphenationFrequency frequency, bool justified) 71 : LineBreakerImpl(std::make_unique<WordBreaker>(), textBuffer, strategy, frequency, 72 justified) {} 73 74LineBreakerImpl::LineBreakerImpl(std::unique_ptr<WordBreaker>&& breaker, 75 const U16StringPiece& textBuffer, BreakStrategy strategy, 76 HyphenationFrequency frequency, bool justified) 77 : mCurrentLocaleListId(LocaleListCache::kInvalidListId), 78 mWordBreaker(std::move(breaker)), 79 mTextBuf(textBuffer), 80 mStrategy(strategy), 81 mHyphenationFrequency(frequency), 82 mJustified(justified), 83 mLastConsideredGreedyCandidate(LAST_BREAK_OFFSET_NOWHERE), 84 mSpaceCount(0) { 85 mWordBreaker->setText(mTextBuf.data(), mTextBuf.size()); 86 87 // handle initial break here because addStyleRun may never be called 88 mCandidates.push_back({0 /* offset */, 0.0 /* preBreak */, 0.0 /* postBreak */, 89 0.0 /* firstOverhang */, 0.0 /* secondOverhang */, 0.0 /* penalty */, 90 0 /* preSpaceCount */, 0 /* postSpaceCount */, 91 HyphenationType::DONT_BREAK /* hyphenType */, 92 false /* isRtl. TODO: may need to be based on input. */}); 93} 94 95LineBreakerImpl::~LineBreakerImpl() {} 96 97const LineBreakerImpl::Candidate& LineBreakerImpl::getLastBreakCandidate() const { 98 MINIKIN_ASSERT(mLastGreedyBreakIndex != LAST_BREAK_OFFSET_NOWHERE, 99 "Line break hasn't started."); 100 return mLastGreedyBreakIndex == LAST_BREAK_OFFSET_DESPERATE 101 ? mFakeDesperateCandidate 102 : mCandidates[mLastGreedyBreakIndex]; 103} 104 105void LineBreakerImpl::setLocaleList(uint32_t localeListId, size_t restartFrom) { 106 if (mCurrentLocaleListId == localeListId) { 107 return; 108 } 109 110 const LocaleList& newLocales = getLocaleList(localeListId); 111 const Locale newLocale = newLocales.empty() ? Locale() : newLocales[0]; 112 const uint64_t newLocaleId = newLocale.getIdentifier(); 113 114 const bool needUpdate = 115 // The first time setLocale is called. 116 mCurrentLocaleListId == LocaleListCache::kInvalidListId || 117 // The effective locale is changed. 118 newLocaleId != mCurrentLocaleId; 119 120 // For now, we ignore all locales except the first valid one. 121 // TODO: Support selecting the locale based on the script of the text. 122 mCurrentLocaleListId = localeListId; 123 mCurrentLocaleId = newLocaleId; 124 if (needUpdate) { 125 mWordBreaker->followingWithLocale(newLocale, restartFrom); 126 mHyphenator = HyphenatorMap::lookup(newLocale); 127 } 128} 129 130// This function determines whether a character is a space that disappears at end of line. 131// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], 132// plus '\n'. 133// Note: all such characters are in the BMP, so it's ok to use code units for this. 134static bool isLineEndSpace(uint16_t c) { 135 return c == '\n' || c == ' ' // SPACE 136 || c == 0x1680 // OGHAM SPACE MARK 137 || (0x2000 <= c && c <= 0x200A && c != 0x2007) // EN QUAD, EM QUAD, EN SPACE, EM SPACE, 138 // THREE-PER-EM SPACE, FOUR-PER-EM SPACE, 139 // SIX-PER-EM SPACE, PUNCTUATION SPACE, 140 // THIN SPACE, HAIR SPACE 141 || c == 0x205F // MEDIUM MATHEMATICAL SPACE 142 || c == 0x3000; // IDEOGRAPHIC SPACE 143} 144 145// Hyphenates a string potentially containing non-breaking spaces. 146std::vector<HyphenationType> LineBreakerImpl::hyphenate(const U16StringPiece& str) { 147 std::vector<HyphenationType> out; 148 const size_t len = str.size(); 149 out.reserve(len); 150 151 // A word here is any consecutive string of non-NBSP characters. 152 bool inWord = false; 153 size_t wordStart = 0; // The initial value will never be accessed, but just in case. 154 for (size_t i = 0; i <= len; i++) { 155 if (i == len || str[i] == CHAR_NBSP) { 156 if (inWord) { 157 // A word just ended. Hyphenate it. 158 const size_t wordLen = i - wordStart; 159 if (wordLen <= LONGEST_HYPHENATED_WORD) { 160 if (wordStart == 0) { 161 // The string starts with a word. Use out directly. 162 mHyphenator->hyphenate(&out, str.data(), wordLen); 163 } else { 164 std::vector<HyphenationType> wordVec; 165 mHyphenator->hyphenate(&wordVec, str.data() + wordStart, wordLen); 166 out.insert(out.end(), wordVec.cbegin(), wordVec.cend()); 167 } 168 } else { // Word is too long. Inefficient to hyphenate. 169 out.insert(out.end(), wordLen, HyphenationType::DONT_BREAK); 170 } 171 inWord = false; 172 } 173 if (i < len) { 174 // Insert one DONT_BREAK for the NBSP. 175 out.push_back(HyphenationType::DONT_BREAK); 176 } 177 } else if (!inWord) { 178 inWord = true; 179 wordStart = i; 180 } 181 } 182 return out; 183} 184 185// Compute the total overhang of text based on per-cluster advances and overhangs. 186// The two input vectors are expected to be of the same size. 187/* static */ LayoutOverhang LineBreakerImpl::computeOverhang( 188 float totalAdvance, const std::vector<float>& advances, 189 const std::vector<LayoutOverhang>& overhangs, bool isRtl) { 190 ParaWidth left = 0.0; 191 ParaWidth right = 0.0; 192 ParaWidth seenAdvance = 0.0; 193 const size_t len = advances.size(); 194 if (isRtl) { 195 for (size_t i = 0; i < len; i++) { 196 right = std::max(right, overhangs[i].right - seenAdvance); 197 seenAdvance += advances[i]; 198 left = std::max(left, overhangs[i].left - (totalAdvance - seenAdvance)); 199 } 200 } else { 201 for (size_t i = 0; i < len; i++) { 202 left = std::max(left, overhangs[i].left - seenAdvance); 203 seenAdvance += advances[i]; 204 right = std::max(right, overhangs[i].right - (totalAdvance - seenAdvance)); 205 } 206 } 207 return {static_cast<float>(left), static_cast<float>(right)}; 208} 209 210// This adds all the hyphenation candidates for a given word by first finding all the hyphenation 211// points and then calling addWordBreak for each. 212// 213// paint will be used for measuring the text and paint must not be null. 214// wordRange is the range for the word. 215// contextRange is the range from the last word breakpoint to the first code unit after the word. 216// For example, if the word starts with the punctuations or ends with spaces, the contextRange 217// contains both punctuations and trailing spaces but wordRange excludes them. 218// lastBreakWidth is the width seen until the begining of context range. 219// bidiFlags keep the bidi flags to determine the direction of text for layout and other 220// calculations. It may only be Bidi::FORCE_RTL or Bidi::FORCE_LTR. 221// 222// The following parameters are needed to be passed to addWordBreak: 223// postBreak is the width that would be seen if we decide to break at the end of the word (so it 224// doesn't count any line-ending space after the word). 225// postSpaceCount is the number of spaces that would be seen if we decide to break at the end of the 226// word (and like postBreak it doesn't count any line-ending space after the word). 227// hyphenPenalty is the amount of penalty for hyphenation. 228void LineBreakerImpl::addHyphenationCandidates(const Run& run, const Range& contextRange, 229 const Range& wordRange, ParaWidth lastBreakWidth, 230 ParaWidth postBreak, size_t postSpaceCount, 231 float hyphenPenalty) { 232 MINIKIN_ASSERT(contextRange.contains(wordRange), "Context must contain word range"); 233 234 const bool isRtlWord = run.isRtl(); 235 const std::vector<HyphenationType> hyphenResult = hyphenate(mTextBuf.substr(wordRange)); 236 237 std::vector<float> advances; 238 std::vector<LayoutOverhang> overhangs; 239 advances.reserve(contextRange.getLength()); 240 overhangs.reserve(contextRange.getLength()); 241 // measure hyphenated substrings 242 for (size_t j : wordRange) { 243 HyphenationType hyph = hyphenResult[wordRange.toRangeOffset(j)]; 244 if (hyph == HyphenationType::DONT_BREAK) { 245 continue; 246 } 247 248 auto hyphenPart = contextRange.split(j); 249 const Range& firstPart = hyphenPart.first; 250 const Range& secondPart = hyphenPart.second; 251 252 const size_t firstPartLen = firstPart.getLength(); 253 advances.resize(firstPartLen); 254 overhangs.resize(firstPartLen); 255 const float firstPartWidth = 256 run.measureHyphenPiece(mTextBuf, firstPart, StartHyphenEdit::NO_EDIT, 257 editForThisLine(hyph), advances.data(), overhangs.data()); 258 const ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth; 259 LayoutOverhang overhang = computeOverhang(firstPartWidth, advances, overhangs, isRtlWord); 260 // TODO: This ignores potential overhang from a previous word, e.g. in "R table" if the 261 // right overhang of the R is larger than the advance of " ta-". In such cases, we need to 262 // take the existing overhang into account. 263 const float firstOverhang = isRtlWord ? overhang.left : overhang.right; 264 265 const size_t secondPartLen = secondPart.getLength(); 266 advances.resize(secondPartLen); 267 overhangs.resize(secondPartLen); 268 const float secondPartWidth = 269 run.measureHyphenPiece(mTextBuf, secondPart, editForNextLine(hyph), 270 EndHyphenEdit::NO_EDIT, advances.data(), overhangs.data()); 271 // hyphPreBreak is calculated like this so that when the line width for a future line break 272 // is being calculated, the width of the whole word would be subtracted and the width of the 273 // second part would be added. 274 const ParaWidth hyphPreBreak = postBreak - secondPartWidth; 275 overhang = computeOverhang(secondPartWidth, advances, overhangs, isRtlWord); 276 const float secondOverhang = isRtlWord ? overhang.right : overhang.left; 277 278 addWordBreak(j, hyphPreBreak, hyphPostBreak, firstOverhang, secondOverhang, postSpaceCount, 279 postSpaceCount, hyphenPenalty, hyph, isRtlWord); 280 } 281} 282 283// This method finds the candidate word breaks (using the ICU break iterator) and sends them 284// to addWordBreak. 285void LineBreakerImpl::addRuns(const MeasuredText& measured, const LineWidth& lineWidth, 286 const TabStops& tabStops) { 287 for (const auto& run : measured.runs) { 288 const bool isRtl = run->isRtl(); 289 const Range& range = run->getRange(); 290 291 const bool canHyphenate = run->canHyphenate(); 292 float hyphenPenalty = 0.0; 293 if (canHyphenate) { 294 const MinikinPaint* paint = run->getPaint(); 295 // a heuristic that seems to perform well 296 hyphenPenalty = 0.5 * paint->size * paint->scaleX * lineWidth.getAt(0); 297 if (mHyphenationFrequency == HyphenationFrequency::Normal) { 298 hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing 299 } 300 301 if (mJustified) { 302 // Make hyphenation more aggressive for fully justified text (so that "normal" in 303 // justified mode is the same as "full" in ragged-right). 304 hyphenPenalty *= 0.25; 305 } else { 306 // Line penalty is zero for justified text. 307 mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); 308 } 309 } 310 311 setLocaleList(run->getLocaleListId(), range.getStart()); 312 size_t current = (size_t)mWordBreaker->current(); 313 // This will keep the index of last code unit seen that's not a line-ending space, plus one. 314 // In other words, the index of the first code unit after a word. 315 316 Range hyphenationContextRange(range.getStart(), range.getStart()); 317 ParaWidth lastBreakWidth = mWidth; // The width of the text as of the previous break point. 318 ParaWidth postBreak = mWidth; // The width of text seen if we decide to break here 319 // postBreak plus potential forward overhang. Guaranteed to be >= postBreak. 320 ParaWidth postBreakWithOverhang = mWidth; 321 // The maximum amount of backward overhang seen since last word. 322 float maxBackwardOverhang = 0; 323 size_t postSpaceCount = mSpaceCount; 324 const bool hyphenate = canHyphenate && mHyphenationFrequency != HyphenationFrequency::None; 325 for (size_t i : range) { 326 const uint16_t c = mTextBuf[i]; 327 if (c == CHAR_TAB) { 328 // Fall back to greedy; other modes don't know how to deal with tabs. 329 mStrategy = BreakStrategy::Greedy; 330 // In order to figure out the actual width of the tab, we need to run the greedy 331 // algorithm on all previous text and determine the last line break's preBreak. 332 const ParaWidth lastPreBreak = computeBreaksGreedyPartial(measured, lineWidth); 333 mWidth = lastPreBreak + tabStops.nextTab(mWidth - lastPreBreak); 334 if (mFirstTabIndex == INT_MAX) { 335 mFirstTabIndex = static_cast<int>(i); 336 } 337 // No need to update afterWord since tab characters can not be an end of word 338 // character in WordBreaker. See the implementation of WordBreaker::wordEnd. 339 } else { 340 if (isWordSpace(c)) { 341 mSpaceCount += 1; 342 } 343 mWidth += measured.widths[i]; 344 if (isLineEndSpace(c)) { 345 // If we break a line on a line-ending space, that space goes away. So postBreak 346 // and postSpaceCount, which keep the width and number of spaces if we decide to 347 // break at this point, don't need to get adjusted. 348 // 349 // TODO: handle the rare case of line ending spaces having overhang (it can 350 // happen for U+1680 OGHAM SPACE MARK). 351 } else { 352 postBreak = mWidth; 353 postSpaceCount = mSpaceCount; 354 hyphenationContextRange.setEnd(i + 1); 355 356 // TODO: This doesn't work for very tight lines and large overhangs, where the 357 // overhang from a previous word that may end up on an earline line may be 358 // considered still in effect for a later word. But that's expected to be very 359 // rare, so we ignore it for now. 360 const float forwardOverhang = 361 isRtl ? measured.overhangs[i].left : measured.overhangs[i].right; 362 postBreakWithOverhang = 363 std::max(postBreakWithOverhang, postBreak + forwardOverhang); 364 365 float backwardOverhang = 366 isRtl ? measured.overhangs[i].right : measured.overhangs[i].left; 367 // Adjust backwardOverhang by the advance already seen from the last break. 368 backwardOverhang -= (mWidth - measured.widths[i]) - lastBreakWidth; 369 maxBackwardOverhang = std::max(maxBackwardOverhang, backwardOverhang); 370 } 371 } 372 if (i + 1 == current) { // We are at the end of a word. 373 // We skip breaks for zero-width characters inside replacement spans. 374 const bool addBreak = 375 canHyphenate || current == range.getEnd() || measured.widths[current] > 0; 376 377 if (addBreak) { 378 // adjust second overhang for previous breaks 379 adjustSecondOverhang(maxBackwardOverhang); 380 } 381 if (hyphenate) { 382 const Range wordRange = mWordBreaker->wordRange(); 383 if (!wordRange.isEmpty() && range.contains(wordRange)) { 384 addHyphenationCandidates(*run, hyphenationContextRange, wordRange, 385 lastBreakWidth, postBreak, postSpaceCount, 386 hyphenPenalty); 387 } 388 } 389 if (addBreak) { 390 const float penalty = hyphenPenalty * mWordBreaker->breakBadness(); 391 // TODO: overhangs may need adjustment at bidi boundaries. 392 addWordBreak(current, mWidth /* preBreak */, postBreak, 393 postBreakWithOverhang - postBreak /* firstOverhang */, 394 0.0 /* secondOverhang, to be adjusted later */, mSpaceCount, 395 postSpaceCount, penalty, HyphenationType::DONT_BREAK, isRtl); 396 } 397 hyphenationContextRange = Range(current, current); 398 lastBreakWidth = mWidth; 399 maxBackwardOverhang = 0; 400 current = (size_t)mWordBreaker->next(); 401 } 402 } 403 } 404} 405 406// Add desperate breaks for the greedy algorithm. 407// Note: these breaks are based on the shaping of the (non-broken) original text; they 408// are imprecise especially in the presence of kerning, ligatures, overhangs, and Arabic shaping. 409void LineBreakerImpl::addDesperateBreaksGreedy(const MeasuredText& measured, 410 ParaWidth existingPreBreak, size_t start, size_t end, 411 const LineWidth& lineWidth) { 412 ParaWidth width = measured.widths[start]; 413 for (size_t i = start + 1; i < end; i++) { 414 const float w = measured.widths[i]; 415 if (w > 0) { // Add desperate breaks only before grapheme clusters. 416 const ParaWidth newWidth = width + w; 417 if (!fitsOnCurrentLine(newWidth, 0.0, 0.0, lineWidth)) { 418 const Candidate& lastGreedyBreak = getLastBreakCandidate(); 419 constexpr HyphenationType hyphen = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; 420 pushBreak(i, width, computeMaxExtent(measured, lastGreedyBreak.offset, i), 421 packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType), 422 editForThisLine(hyphen))); 423 424 existingPreBreak += width; 425 // Only set the fields that will be later read. 426 mFakeDesperateCandidate.offset = i; 427 mFakeDesperateCandidate.preBreak = existingPreBreak; 428 mFakeDesperateCandidate.secondOverhang = 0.0; 429 mFakeDesperateCandidate.hyphenType = hyphen; 430 mLastGreedyBreakIndex = LAST_BREAK_OFFSET_DESPERATE; 431 432 width = w; 433 } else { 434 width = newWidth; 435 } 436 } 437 } 438} 439 440// Add a word break (possibly for a hyphenated fragment). 441inline void LineBreakerImpl::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, 442 float firstOverhang, float secondOverhang, 443 size_t preSpaceCount, size_t postSpaceCount, 444 float penalty, HyphenationType hyph, bool isRtl) { 445 mCandidates.push_back({offset, preBreak, postBreak, firstOverhang, secondOverhang, penalty, 446 preSpaceCount, postSpaceCount, hyph, isRtl}); 447} 448 449// Go back and adjust earlier line breaks if needed. 450void LineBreakerImpl::adjustSecondOverhang(float secondOverhang) { 451 const size_t lastCand = mCandidates.size() - 1; 452 const ParaWidth lastPreBreak = mCandidates[lastCand].preBreak; 453 for (ssize_t i = lastCand; i >= 0; i--) { 454 // "lastPreBreak - mCandidates[i].preBreak" is the amount of difference in mWidth when those 455 // breaks where added. So by subtracting that difference, we are subtracting the difference 456 // in advances in order to find out how much overhang still remains. 457 const float remainingOverhang = secondOverhang - (lastPreBreak - mCandidates[i].preBreak); 458 if (remainingOverhang <= 0.0) { 459 // No more remaining overhang. We don't need to adjust anything anymore. 460 return; 461 } 462 mCandidates[i].secondOverhang = std::max(mCandidates[i].secondOverhang, remainingOverhang); 463 } 464} 465 466// Find the needed extent between the start and end ranges. start is inclusive and end is exclusive. 467// Both are indices of the source string. 468MinikinExtent LineBreakerImpl::computeMaxExtent(const MeasuredText& measured, size_t start, 469 size_t end) const { 470 MinikinExtent res = {0.0, 0.0, 0.0}; 471 for (size_t j = start; j < end; j++) { 472 res.extendBy(measured.extents[j]); 473 } 474 return res; 475} 476 477void LineBreakerImpl::addGreedyBreak(const MeasuredText& measured, size_t breakIndex) { 478 const Candidate& candidate = mCandidates[breakIndex]; 479 const Candidate& lastGreedyBreak = getLastBreakCandidate(); 480 pushBreak(candidate.offset, candidate.postBreak - lastGreedyBreak.preBreak, 481 computeMaxExtent(measured, lastGreedyBreak.offset, candidate.offset), 482 packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType), 483 editForThisLine(candidate.hyphenType))); 484 mLastGreedyBreakIndex = breakIndex; 485} 486 487// Also add desperate breaks if needed (ie when word exceeds current line width). 488void LineBreakerImpl::considerGreedyBreakCandidate(const MeasuredText& measured, size_t candIndex, 489 const LineWidth& lineWidth) { 490 const Candidate* cand = &mCandidates[candIndex]; 491 const Candidate* lastGreedyBreak = &getLastBreakCandidate(); 492 float leftOverhang, rightOverhang; 493 // TODO: Only works correctly for unidirectional text. Needs changes for bidi text. 494 if (cand->isRtl) { 495 leftOverhang = cand->firstOverhang; 496 rightOverhang = lastGreedyBreak->secondOverhang; 497 } else { 498 leftOverhang = lastGreedyBreak->secondOverhang; 499 rightOverhang = cand->firstOverhang; 500 } 501 while (!fitsOnCurrentLine(cand->postBreak - lastGreedyBreak->preBreak, leftOverhang, 502 rightOverhang, lineWidth)) { 503 // This break would create an overfull line, pick the best break and break there (greedy). 504 // We do this in a loop, since there's no guarantee that after a break the remaining text 505 // would fit on the next line. 506 507 if (mBestGreedyBreaks.empty()) { 508 // If no break has been found since last break but we are inside this loop, the 509 // section between the last line break and this candidate doesn't fit in the available 510 // space. So we need to consider desperate breaks. 511 512 // Add desperate breaks starting immediately after the last break. 513 addDesperateBreaksGreedy(measured, lastGreedyBreak->preBreak, lastGreedyBreak->offset, 514 cand->offset, lineWidth); 515 break; 516 } else { 517 // Break at the best known break. 518 addGreedyBreak(measured, popBestGreedyBreak()); 519 520 // addGreedyBreak updates the last break candidate. 521 lastGreedyBreak = &getLastBreakCandidate(); 522 if (cand->isRtl) { 523 rightOverhang = lastGreedyBreak->secondOverhang; 524 } else { 525 leftOverhang = lastGreedyBreak->secondOverhang; 526 } 527 } 528 } 529 insertGreedyBreakCandidate(candIndex, cand->penalty); 530} 531 532void LineBreakerImpl::pushBreak(int offset, float width, MinikinExtent extent, 533 HyphenEdit hyphenEdit) { 534 mBreaks.push_back(offset); 535 mWidths.push_back(width); 536 mAscents.push_back(extent.ascent); 537 mDescents.push_back(extent.descent); 538 const int flags = ((mFirstTabIndex < mBreaks.back()) << kTab_Shift) | hyphenEdit; 539 mFlags.push_back(flags); 540 mFirstTabIndex = INT_MAX; 541} 542 543LineBreakerImpl::ParaWidth LineBreakerImpl::computeBreaksGreedyPartial(const MeasuredText& measured, 544 const LineWidth& lineWidth) { 545 size_t firstCandidate; 546 if (mLastConsideredGreedyCandidate == SIZE_MAX) { 547 // Clear results and reset greedy line breaker state if we are here for the first time. 548 clearResults(); 549 mBestGreedyBreaks.clear(); 550 mLastGreedyBreakIndex = 0; 551 mFirstTabIndex = INT_MAX; 552 firstCandidate = 1; 553 } else { 554 firstCandidate = mLastConsideredGreedyCandidate + 1; 555 } 556 557 const size_t lastCandidate = mCandidates.size() - 1; 558 for (size_t cand = firstCandidate; cand <= lastCandidate; cand++) { 559 considerGreedyBreakCandidate(measured, cand, lineWidth); 560 } 561 mLastConsideredGreedyCandidate = lastCandidate; 562 return getLastBreakCandidate().preBreak; 563} 564 565// Get the width of a space. May return 0 if there are no spaces. 566// Note: if there are multiple different widths for spaces (for example, because of mixing of 567// fonts), it's only guaranteed to pick one. 568float LineBreakerImpl::getSpaceWidth(const MeasuredText& measured) const { 569 for (size_t i = 0; i < mTextBuf.size(); i++) { 570 if (isWordSpace(mTextBuf[i])) { 571 return measured.widths[i]; 572 } 573 } 574 return 0.0f; 575} 576 577bool LineBreakerImpl::fitsOnCurrentLine(float width, float leftOverhang, float rightOverhang, 578 const LineWidth& lineWidth) const { 579 const size_t lineNo = mBreaks.size(); 580 const float availableWidth = lineWidth.getAt(lineNo); 581 const float availableLeftPadding = lineWidth.getLeftPaddingAt(lineNo); 582 const float availableRightPadding = lineWidth.getRightPaddingAt(lineNo); 583 const float remainingLeftOverhang = std::max(0.0f, leftOverhang - availableLeftPadding); 584 const float remainingRightOverhang = std::max(0.0f, rightOverhang - availableRightPadding); 585 return width + remainingLeftOverhang + remainingRightOverhang <= availableWidth; 586} 587 588void LineBreakerImpl::computeBreaksGreedy(const MeasuredText& measured, 589 const LineWidth& lineWidth) { 590 computeBreaksGreedyPartial(measured, lineWidth); 591 // All breaks but the last have been added by computeBreaksGreedyPartial() already. 592 const Candidate* lastCandidate = &mCandidates.back(); 593 if (mCandidates.size() == 1 || mLastGreedyBreakIndex != (mCandidates.size() - 1)) { 594 const Candidate& lastGreedyBreak = getLastBreakCandidate(); 595 pushBreak(lastCandidate->offset, lastCandidate->postBreak - lastGreedyBreak.preBreak, 596 computeMaxExtent(measured, lastGreedyBreak.offset, lastCandidate->offset), 597 packHyphenEdit(editForNextLine(lastGreedyBreak.hyphenType), 598 EndHyphenEdit::NO_EDIT)); 599 // No need to update mLastGreedyBreakIndex because we're done. 600 } 601} 602 603// Add desperate breaks for the optimal algorithm. 604// Note: these breaks are based on the shaping of the (non-broken) original text; they 605// are imprecise especially in the presence of kerning, ligatures, overhangs, and Arabic shaping. 606void LineBreakerImpl::addDesperateBreaksOptimal(const MeasuredText& measured, 607 std::vector<Candidate>* out, 608 ParaWidth existingPreBreak, size_t postSpaceCount, 609 bool isRtl, size_t start, size_t end) { 610 ParaWidth width = existingPreBreak + measured.widths[start]; 611 for (size_t i = start + 1; i < end; i++) { 612 const float w = measured.widths[i]; 613 if (w > 0) { // Add desperate breaks only before grapheme clusters. 614 out->push_back({i /* offset */, width /* preBreak */, width /* postBreak */, 615 0.0 /* firstOverhang */, 0.0 /* secondOverhang */, 616 SCORE_DESPERATE /* penalty */, 617 // postSpaceCount doesn't include trailing spaces. 618 postSpaceCount /* preSpaceCount */, postSpaceCount /* postSpaceCount */, 619 HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN /* hyphenType */, 620 isRtl /* isRtl */}); 621 width += w; 622 } 623 } 624} 625 626void LineBreakerImpl::addAllDesperateBreaksOptimal(const MeasuredText& measured, 627 const LineWidth& lineWidth) { 628 const ParaWidth minLineWidth = lineWidth.getMin(); 629 size_t firstDesperateIndex = 0; 630 const size_t nCand = mCandidates.size(); 631 for (size_t i = 1; i < nCand; i++) { 632 const ParaWidth requiredWidth = mCandidates[i].postBreak - mCandidates[i - 1].preBreak; 633 if (requiredWidth > minLineWidth) { 634 // A desperate break is needed. 635 firstDesperateIndex = i; 636 break; 637 } 638 } 639 if (firstDesperateIndex == 0) { // No desperate breaks needed. 640 return; 641 } 642 643 // This temporary holds an expanded list of candidates, which will be later copied back into 644 // mCandidates. The beginning of mCandidates, where there are no desparate breaks, is skipped. 645 std::vector<Candidate> expandedCandidates; 646 const size_t nRemainingCandidates = nCand - firstDesperateIndex; 647 expandedCandidates.reserve(nRemainingCandidates + 1); // At least one more is needed. 648 for (size_t i = firstDesperateIndex; i < nCand; i++) { 649 const Candidate& previousCand = mCandidates[i - 1]; 650 const Candidate& thisCand = mCandidates[i]; 651 const ParaWidth requiredWidth = thisCand.postBreak - previousCand.preBreak; 652 if (requiredWidth > minLineWidth) { 653 addDesperateBreaksOptimal(measured, &expandedCandidates, previousCand.preBreak, 654 thisCand.postSpaceCount, thisCand.isRtl, 655 previousCand.offset /* start */, thisCand.offset /* end */); 656 } 657 expandedCandidates.push_back(thisCand); 658 } 659 660 mCandidates.reserve(firstDesperateIndex + expandedCandidates.size()); 661 // Iterator to the first candidate to insert from expandedCandidates. The candidates before this 662 // would simply be copied. 663 auto firstToInsert = expandedCandidates.begin() + nRemainingCandidates; 664 // Copy until the end of mCandidates. 665 std::copy(expandedCandidates.begin(), firstToInsert, mCandidates.begin() + firstDesperateIndex); 666 // Insert the rest. 667 mCandidates.insert(mCandidates.end(), firstToInsert, expandedCandidates.end()); 668} 669 670// Follow "prev" links in mCandidates array, and copy to result arrays. 671void LineBreakerImpl::finishBreaksOptimal(const MeasuredText& measured, 672 const std::vector<OptimalBreaksData>& breaksData) { 673 // clear output vectors. 674 clearResults(); 675 676 const size_t nCand = mCandidates.size(); 677 size_t prev; 678 for (size_t i = nCand - 1; i > 0; i = prev) { 679 prev = breaksData[i].prev; 680 mBreaks.push_back(mCandidates[i].offset); 681 mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); 682 MinikinExtent extent = 683 computeMaxExtent(measured, mCandidates[prev].offset, mCandidates[i].offset); 684 mAscents.push_back(extent.ascent); 685 mDescents.push_back(extent.descent); 686 687 const HyphenEdit edit = 688 packHyphenEdit(prev == 0 ? StartHyphenEdit::NO_EDIT 689 : editForNextLine(mCandidates[prev].hyphenType), 690 editForThisLine(mCandidates[i].hyphenType)); 691 mFlags.push_back(static_cast<int>(edit)); 692 } 693 std::reverse(mBreaks.begin(), mBreaks.end()); 694 std::reverse(mWidths.begin(), mWidths.end()); 695 std::reverse(mFlags.begin(), mFlags.end()); 696} 697 698void LineBreakerImpl::computeBreaksOptimal(const MeasuredText& measured, 699 const LineWidth& lineWidth) { 700 size_t active = 0; 701 const size_t nCand = mCandidates.size(); 702 const float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth(measured) : 0.0f; 703 704 std::vector<OptimalBreaksData> breaksData; 705 breaksData.reserve(nCand); 706 breaksData.push_back({0.0, 0, 0}); // The first candidate is always at the first line. 707 708 // "i" iterates through candidates for the end of the line. 709 for (size_t i = 1; i < nCand; i++) { 710 const bool atEnd = i == nCand - 1; 711 float best = SCORE_INFTY; 712 size_t bestPrev = 0; 713 714 size_t lineNumberLast = breaksData[active].lineNumber; 715 float width = lineWidth.getAt(lineNumberLast); 716 717 ParaWidth leftEdge = mCandidates[i].postBreak - width; 718 float bestHope = 0; 719 720 // "j" iterates through candidates for the beginning of the line. 721 for (size_t j = active; j < i; j++) { 722 const size_t lineNumber = breaksData[j].lineNumber; 723 if (lineNumber != lineNumberLast) { 724 const float widthNew = lineWidth.getAt(lineNumber); 725 if (widthNew != width) { 726 leftEdge = mCandidates[i].postBreak - width; 727 bestHope = 0; 728 width = widthNew; 729 } 730 lineNumberLast = lineNumber; 731 } 732 const float jScore = breaksData[j].score; 733 if (jScore + bestHope >= best) continue; 734 const float delta = mCandidates[j].preBreak - leftEdge; 735 736 // compute width score for line 737 738 // Note: the "bestHope" optimization makes the assumption that, when delta is 739 // non-negative, widthScore will increase monotonically as successive candidate 740 // breaks are considered. 741 float widthScore = 0.0f; 742 float additionalPenalty = 0.0f; 743 if ((atEnd || !mJustified) && delta < 0) { 744 widthScore = SCORE_OVERFULL; 745 } else if (atEnd && mStrategy != BreakStrategy::Balanced) { 746 // increase penalty for hyphen on last line 747 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty; 748 } else { 749 widthScore = delta * delta; 750 if (delta < 0) { 751 if (-delta < maxShrink * (mCandidates[i].postSpaceCount - 752 mCandidates[j].preSpaceCount)) { 753 widthScore *= SHRINK_PENALTY_MULTIPLIER; 754 } else { 755 widthScore = SCORE_OVERFULL; 756 } 757 } 758 } 759 760 if (delta < 0) { 761 active = j + 1; 762 } else { 763 bestHope = widthScore; 764 } 765 766 const float score = jScore + widthScore + additionalPenalty; 767 if (score <= best) { 768 best = score; 769 bestPrev = j; 770 } 771 } 772 breaksData.push_back({best + mCandidates[i].penalty + mLinePenalty, // score 773 bestPrev, // prev 774 breaksData[bestPrev].lineNumber + 1}); // lineNumber 775 } 776 finishBreaksOptimal(measured, breaksData); 777} 778 779LineBreakResult LineBreakerImpl::computeBreaks(const MeasuredText& measured, 780 const LineWidth& lineWidth, 781 const TabStops& tabStops) { 782 addRuns(measured, lineWidth, tabStops); 783 784 if (mStrategy == BreakStrategy::Greedy) { 785 computeBreaksGreedy(measured, lineWidth); 786 } else { 787 addAllDesperateBreaksOptimal(measured, lineWidth); 788 computeBreaksOptimal(measured, lineWidth); 789 } 790 LineBreakResult result; 791 result.breakPoints = std::move(mBreaks); 792 result.widths = std::move(mWidths); 793 result.ascents = std::move(mAscents); 794 result.descents = std::move(mDescents); 795 result.flags = std::move(mFlags); 796 return result; 797} 798 799size_t LineBreakerImpl::popBestGreedyBreak() { 800 const size_t bestBreak = mBestGreedyBreaks.front().index; 801 mBestGreedyBreaks.pop_front(); 802 return bestBreak; 803} 804 805void LineBreakerImpl::insertGreedyBreakCandidate(size_t index, float penalty) { 806 GreedyBreak gBreak = {index, penalty}; 807 if (!mBestGreedyBreaks.empty()) { 808 // Find the location in the queue where the penalty is <= the current penalty, and drop the 809 // elements from there to the end of the queue. 810 auto where = std::lower_bound(mBestGreedyBreaks.begin(), mBestGreedyBreaks.end(), gBreak, 811 [](GreedyBreak first, GreedyBreak second) -> bool { 812 return first.penalty < second.penalty; 813 }); 814 if (where != mBestGreedyBreaks.end()) { 815 mBestGreedyBreaks.erase(where, mBestGreedyBreaks.end()); 816 } 817 } 818 mBestGreedyBreaks.push_back(gBreak); 819} 820 821LineBreakResult breakIntoLines(const U16StringPiece& textBuffer, BreakStrategy strategy, 822 HyphenationFrequency frequency, bool justified, 823 const MeasuredText& measuredText, const LineWidth& lineWidth, 824 const TabStops& tabStops) { 825 LineBreakerImpl impl(textBuffer, strategy, frequency, justified); 826 return impl.computeBreaks(measuredText, lineWidth, tabStops); 827} 828 829} // namespace minikin 830