LineBreaker.cpp revision 728f42cbd5acd0fa8d8dbe07d0302d41475eb95c
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define VERBOSE_DEBUG 0 18 19#define LOG_TAG "Minikin" 20 21#include <limits> 22 23#include <log/log.h> 24 25#include "LayoutUtils.h" 26#include <minikin/Layout.h> 27#include <minikin/LineBreaker.h> 28 29using std::vector; 30 31namespace minikin { 32 33const int CHAR_TAB = 0x0009; 34 35// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these 36// constants are larger than any reasonable actual width score. 37const float SCORE_INFTY = std::numeric_limits<float>::max(); 38const float SCORE_OVERFULL = 1e12f; 39const float SCORE_DESPERATE = 1e10f; 40 41// Multiplier for hyphen penalty on last line. 42const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f; 43// Penalty assigned to each line break (to try to minimize number of lines) 44// TODO: when we implement full justification (so spaces can shrink and stretch), this is 45// probably not the most appropriate method. 46const float LINE_PENALTY_MULTIPLIER = 2.0f; 47 48// Penalty assigned to shrinking the whitepsace. 49const float SHRINK_PENALTY_MULTIPLIER = 4.0f; 50 51// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for 52// unreasonably long words. This is somewhat of a heuristic because extremely long words 53// are possible in some languages. This does mean that very long real words can get 54// broken by desperate breaks, with no hyphens. 55const size_t LONGEST_HYPHENATED_WORD = 45; 56 57// When the text buffer is within this limit, capacity of vectors is retained at finish(), 58// to avoid allocation. 59const size_t MAX_TEXT_BUF_RETAIN = 32678; 60 61// Maximum amount that spaces can shrink, in justified text. 62const float SHRINKABILITY = 1.0 / 3.0; 63 64void LineBreaker::setLocales(const char* locales, const std::vector<Hyphenator*>& hyphenators) { 65 bool goodLocaleFound = false; 66 const ssize_t numLocales = hyphenators.size(); 67 // For now, we ignore all locales except the first valid one. 68 // TODO: Support selecting the locale based on the script of the text. 69 const char* localeStart = locales; 70 for (ssize_t i = 0; i < numLocales - 1; i++) { // Loop over all locales, except the last one. 71 const char* localeEnd = strchr(localeStart, ','); 72 const size_t localeNameLength = localeEnd - localeStart; 73 char localeName[localeNameLength + 1]; 74 strncpy(localeName, localeStart, localeNameLength); 75 localeName[localeNameLength] = '\0'; 76 mLocale = icu::Locale::createFromName(localeName); 77 goodLocaleFound = !mLocale.isBogus(); 78 if (goodLocaleFound) { 79 mHyphenator = hyphenators[i]; 80 break; 81 } else { 82 localeStart = localeEnd + 1; 83 } 84 } 85 if (!goodLocaleFound) { // Try the last locale. 86 mLocale = icu::Locale::createFromName(localeStart); 87 if (mLocale.isBogus()) { 88 // No good locale. 89 mLocale = icu::Locale::getRoot(); 90 mHyphenator = nullptr; 91 } else { 92 mHyphenator = numLocales == 0 ? nullptr : hyphenators[numLocales - 1]; 93 } 94 } 95 mWordBreaker.setLocale(mLocale); 96} 97 98void LineBreaker::setText() { 99 mWordBreaker.setText(mTextBuf.data(), mTextBuf.size()); 100 101 // handle initial break here because addStyleRun may never be called 102 mWordBreaker.next(); 103 mCandidates.clear(); 104 Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK}; 105 mCandidates.push_back(cand); 106 107 // reset greedy breaker state 108 mBreaks.clear(); 109 mWidths.clear(); 110 mFlags.clear(); 111 mLastBreak = 0; 112 mBestBreak = 0; 113 mBestScore = SCORE_INFTY; 114 mPreBreak = 0; 115 mLastHyphenation = HyphenEdit::NO_EDIT; 116 mFirstTabIndex = INT_MAX; 117 mSpaceCount = 0; 118} 119 120void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 121 mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); 122} 123 124 125void LineBreaker::setIndents(const std::vector<float>& indents) { 126 mLineWidths.setIndents(indents); 127} 128 129// This function determines whether a character is a space that disappears at end of line. 130// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], 131// plus '\n'. 132// Note: all such characters are in the BMP, so it's ok to use code units for this. 133static bool isLineEndSpace(uint16_t c) { 134 return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || 135 c == 0x205F || c == 0x3000; 136} 137 138// Ordinarily, this method measures the text in the range given. However, when paint 139// is nullptr, it assumes the widths have already been calculated and stored in the 140// width buffer. 141// This method finds the candidate word breaks (using the ICU break iterator) and sends them 142// to addCandidate. 143float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface, 144 FontStyle style, size_t start, size_t end, bool isRtl) { 145 float width = 0.0f; 146 int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; 147 148 float hyphenPenalty = 0.0; 149 if (paint != nullptr) { 150 width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, 151 style, *paint, typeface, mCharWidths.data() + start); 152 153 // a heuristic that seems to perform well 154 hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0); 155 if (mHyphenationFrequency == kHyphenationFrequency_Normal) { 156 hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing 157 } 158 159 if (mJustified) { 160 // Make hyphenation more aggressive for fully justified text (so that "normal" in 161 // justified mode is the same as "full" in ragged-right). 162 hyphenPenalty *= 0.25; 163 } else { 164 // Line penalty is zero for justified text. 165 mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); 166 } 167 } 168 169 size_t current = (size_t)mWordBreaker.current(); 170 size_t afterWord = start; 171 size_t lastBreak = start; 172 ParaWidth lastBreakWidth = mWidth; 173 ParaWidth postBreak = mWidth; 174 size_t postSpaceCount = mSpaceCount; 175 for (size_t i = start; i < end; i++) { 176 uint16_t c = mTextBuf[i]; 177 if (c == CHAR_TAB) { 178 mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); 179 if (mFirstTabIndex == INT_MAX) { 180 mFirstTabIndex = (int)i; 181 } 182 // fall back to greedy; other modes don't know how to deal with tabs 183 mStrategy = kBreakStrategy_Greedy; 184 } else { 185 if (isWordSpace(c)) mSpaceCount += 1; 186 mWidth += mCharWidths[i]; 187 if (!isLineEndSpace(c)) { 188 postBreak = mWidth; 189 postSpaceCount = mSpaceCount; 190 afterWord = i + 1; 191 } 192 } 193 if (i + 1 == current) { 194 size_t wordStart = mWordBreaker.wordStart(); 195 size_t wordEnd = mWordBreaker.wordEnd(); 196 if (paint != nullptr && mHyphenator != nullptr && 197 mHyphenationFrequency != kHyphenationFrequency_None && 198 wordStart >= start && wordEnd > wordStart && 199 wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) { 200 mHyphenator->hyphenate(&mHyphBuf, 201 &mTextBuf[wordStart], 202 wordEnd - wordStart, 203 mLocale); 204#if VERBOSE_DEBUG 205 std::string hyphenatedString; 206 for (size_t j = wordStart; j < wordEnd; j++) { 207 if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) { 208 hyphenatedString.push_back('-'); 209 } 210 // Note: only works with ASCII, should do UTF-8 conversion here 211 hyphenatedString.push_back(buffer()[j]); 212 } 213 ALOGD("hyphenated string: %s", hyphenatedString.c_str()); 214#endif 215 216 // measure hyphenated substrings 217 for (size_t j = wordStart; j < wordEnd; j++) { 218 HyphenationType hyph = mHyphBuf[j - wordStart]; 219 if (hyph != HyphenationType::DONT_BREAK) { 220 paint->hyphenEdit = HyphenEdit::editForThisLine(hyph); 221 const float firstPartWidth = Layout::measureText(mTextBuf.data(), 222 lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style, 223 *paint, typeface, nullptr); 224 ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth; 225 226 paint->hyphenEdit = HyphenEdit::editForNextLine(hyph); 227 const float secondPartWidth = Layout::measureText(mTextBuf.data(), j, 228 afterWord - j, mTextBuf.size(), bidiFlags, style, *paint, 229 typeface, nullptr); 230 ParaWidth hyphPreBreak = postBreak - secondPartWidth; 231 232 addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount, 233 hyphenPenalty, hyph); 234 235 paint->hyphenEdit = HyphenEdit::NO_EDIT; 236 } 237 } 238 } 239 240 // Skip break for zero-width characters inside replacement span 241 if (paint != nullptr || current == end || mCharWidths[current] > 0) { 242 float penalty = hyphenPenalty * mWordBreaker.breakBadness(); 243 addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty, 244 HyphenationType::DONT_BREAK); 245 } 246 lastBreak = current; 247 lastBreakWidth = mWidth; 248 current = (size_t)mWordBreaker.next(); 249 } 250 } 251 252 return width; 253} 254 255// add a word break (possibly for a hyphenated fragment), and add desperate breaks if 256// needed (ie when word exceeds current line width) 257void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, 258 size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) { 259 Candidate cand; 260 ParaWidth width = mCandidates.back().preBreak; 261 if (postBreak - width > currentLineWidth()) { 262 // Add desperate breaks. 263 // Note: these breaks are based on the shaping of the (non-broken) original text; they 264 // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping. 265 size_t i = mCandidates.back().offset; 266 width += mCharWidths[i++]; 267 for (; i < offset; i++) { 268 float w = mCharWidths[i]; 269 if (w > 0) { 270 cand.offset = i; 271 cand.preBreak = width; 272 cand.postBreak = width; 273 // postSpaceCount doesn't include trailing spaces 274 cand.preSpaceCount = postSpaceCount; 275 cand.postSpaceCount = postSpaceCount; 276 cand.penalty = SCORE_DESPERATE; 277 cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; 278#if VERBOSE_DEBUG 279 ALOGD("desperate cand: %zd %g:%g", 280 mCandidates.size(), cand.postBreak, cand.preBreak); 281#endif 282 addCandidate(cand); 283 width += w; 284 } 285 } 286 } 287 288 cand.offset = offset; 289 cand.preBreak = preBreak; 290 cand.postBreak = postBreak; 291 cand.penalty = penalty; 292 cand.preSpaceCount = preSpaceCount; 293 cand.postSpaceCount = postSpaceCount; 294 cand.hyphenType = hyph; 295#if VERBOSE_DEBUG 296 ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); 297#endif 298 addCandidate(cand); 299} 300 301// Helper method for addCandidate() 302void LineBreaker::pushGreedyBreak() { 303 const Candidate& bestCandidate = mCandidates[mBestBreak]; 304 pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak, 305 mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType)); 306 mBestScore = SCORE_INFTY; 307#if VERBOSE_DEBUG 308 ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); 309#endif 310 mLastBreak = mBestBreak; 311 mPreBreak = bestCandidate.preBreak; 312 mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType); 313} 314 315// TODO performance: could avoid populating mCandidates if greedy only 316void LineBreaker::addCandidate(Candidate cand) { 317 const size_t candIndex = mCandidates.size(); 318 mCandidates.push_back(cand); 319 320 // mLastBreak is the index of the last line break we decided to do in mCandidates, 321 // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate 322 // we have found since then, and mBestScore is its penalty. 323 if (cand.postBreak - mPreBreak > currentLineWidth()) { 324 // This break would create an overfull line, pick the best break and break there (greedy) 325 if (mBestBreak == mLastBreak) { 326 // No good break has been found since last break. Break here. 327 mBestBreak = candIndex; 328 } 329 pushGreedyBreak(); 330 } 331 332 while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) { 333 // We should rarely come here. But if we are here, we have broken the line, but the 334 // remaining part still doesn't fit. We now need to break at the second best place after the 335 // last break, but we have not kept that information, so we need to go back and find it. 336 // 337 // In some really rare cases, postBreak - preBreak of a candidate itself may be over the 338 // current line width. We protect ourselves against an infinite loop in that case by 339 // checking that we have not broken the line at this candidate already. 340 for (size_t i = mLastBreak + 1; i < candIndex; i++) { 341 const float penalty = mCandidates[i].penalty; 342 if (penalty <= mBestScore) { 343 mBestBreak = i; 344 mBestScore = penalty; 345 } 346 } 347 if (mBestBreak == mLastBreak) { 348 // We didn't find anything good. Break here. 349 mBestBreak = candIndex; 350 } 351 pushGreedyBreak(); 352 } 353 354 if (cand.penalty <= mBestScore) { 355 mBestBreak = candIndex; 356 mBestScore = cand.penalty; 357 } 358} 359 360void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) { 361 mBreaks.push_back(offset); 362 mWidths.push_back(width); 363 int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift; 364 flags |= hyphenEdit; 365 mFlags.push_back(flags); 366 mFirstTabIndex = INT_MAX; 367} 368 369void LineBreaker::addReplacement(size_t start, size_t end, float width) { 370 mCharWidths[start] = width; 371 std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); 372 addStyleRun(nullptr, nullptr, FontStyle(), start, end, false); 373} 374 375// Get the width of a space. May return 0 if there are no spaces. 376// Note: if there are multiple different widths for spaces (for example, because of mixing of 377// fonts), it's only guaranteed to pick one. 378float LineBreaker::getSpaceWidth() const { 379 for (size_t i = 0; i < mTextBuf.size(); i++) { 380 if (isWordSpace(mTextBuf[i])) { 381 return mCharWidths[i]; 382 } 383 } 384 return 0.0f; 385} 386 387float LineBreaker::currentLineWidth() const { 388 return mLineWidths.getLineWidth(mBreaks.size()); 389} 390 391void LineBreaker::computeBreaksGreedy() { 392 // All breaks but the last have been added in addCandidate already. 393 size_t nCand = mCandidates.size(); 394 if (nCand == 1 || mLastBreak != nCand - 1) { 395 pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 396 mLastHyphenation); 397 // don't need to update mBestScore, because we're done 398#if VERBOSE_DEBUG 399 ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); 400#endif 401 } 402} 403 404// Follow "prev" links in mCandidates array, and copy to result arrays. 405void LineBreaker::finishBreaksOptimal() { 406 // clear existing greedy break result 407 mBreaks.clear(); 408 mWidths.clear(); 409 mFlags.clear(); 410 size_t nCand = mCandidates.size(); 411 size_t prev; 412 for (size_t i = nCand - 1; i > 0; i = prev) { 413 prev = mCandidates[i].prev; 414 mBreaks.push_back(mCandidates[i].offset); 415 mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); 416 int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType); 417 if (prev > 0) { 418 flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType); 419 } 420 mFlags.push_back(flags); 421 } 422 std::reverse(mBreaks.begin(), mBreaks.end()); 423 std::reverse(mWidths.begin(), mWidths.end()); 424 std::reverse(mFlags.begin(), mFlags.end()); 425} 426 427void LineBreaker::computeBreaksOptimal(bool isRectangle) { 428 size_t active = 0; 429 size_t nCand = mCandidates.size(); 430 float width = mLineWidths.getLineWidth(0); 431 float shortLineFactor = mJustified ? 0.75f : 0.5f; 432 float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f; 433 434 // "i" iterates through candidates for the end of the line. 435 for (size_t i = 1; i < nCand; i++) { 436 bool atEnd = i == nCand - 1; 437 float best = SCORE_INFTY; 438 size_t bestPrev = 0; 439 size_t lineNumberLast = 0; 440 441 if (!isRectangle) { 442 size_t lineNumberLast = mCandidates[active].lineNumber; 443 width = mLineWidths.getLineWidth(lineNumberLast); 444 } 445 ParaWidth leftEdge = mCandidates[i].postBreak - width; 446 float bestHope = 0; 447 448 // "j" iterates through candidates for the beginning of the line. 449 for (size_t j = active; j < i; j++) { 450 if (!isRectangle) { 451 size_t lineNumber = mCandidates[j].lineNumber; 452 if (lineNumber != lineNumberLast) { 453 float widthNew = mLineWidths.getLineWidth(lineNumber); 454 if (widthNew != width) { 455 leftEdge = mCandidates[i].postBreak - width; 456 bestHope = 0; 457 width = widthNew; 458 } 459 lineNumberLast = lineNumber; 460 } 461 } 462 float jScore = mCandidates[j].score; 463 if (jScore + bestHope >= best) continue; 464 float delta = mCandidates[j].preBreak - leftEdge; 465 466 // compute width score for line 467 468 // Note: the "bestHope" optimization makes the assumption that, when delta is 469 // non-negative, widthScore will increase monotonically as successive candidate 470 // breaks are considered. 471 float widthScore = 0.0f; 472 float additionalPenalty = 0.0f; 473 if ((atEnd || !mJustified) && delta < 0) { 474 widthScore = SCORE_OVERFULL; 475 } else if (atEnd && mStrategy != kBreakStrategy_Balanced) { 476 // increase penalty for hyphen on last line 477 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty; 478 // Penalize very short (< 1 - shortLineFactor of total width) lines. 479 float underfill = delta - shortLineFactor * width; 480 widthScore = underfill > 0 ? underfill * underfill : 0; 481 } else { 482 widthScore = delta * delta; 483 if (delta < 0) { 484 if (-delta < maxShrink * 485 (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) { 486 widthScore *= SHRINK_PENALTY_MULTIPLIER; 487 } else { 488 widthScore = SCORE_OVERFULL; 489 } 490 } 491 } 492 493 if (delta < 0) { 494 active = j + 1; 495 } else { 496 bestHope = widthScore; 497 } 498 499 float score = jScore + widthScore + additionalPenalty; 500 if (score <= best) { 501 best = score; 502 bestPrev = j; 503 } 504 } 505 mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty; 506 mCandidates[i].prev = bestPrev; 507 mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1; 508#if VERBOSE_DEBUG 509 ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev); 510#endif 511 } 512 finishBreaksOptimal(); 513} 514 515size_t LineBreaker::computeBreaks() { 516 if (mStrategy == kBreakStrategy_Greedy) { 517 computeBreaksGreedy(); 518 } else { 519 computeBreaksOptimal(mLineWidths.isConstant()); 520 } 521 return mBreaks.size(); 522} 523 524void LineBreaker::finish() { 525 mWordBreaker.finish(); 526 mWidth = 0; 527 mLineWidths.clear(); 528 mCandidates.clear(); 529 mBreaks.clear(); 530 mWidths.clear(); 531 mFlags.clear(); 532 if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { 533 mTextBuf.clear(); 534 mTextBuf.shrink_to_fit(); 535 mCharWidths.clear(); 536 mCharWidths.shrink_to_fit(); 537 mHyphBuf.clear(); 538 mHyphBuf.shrink_to_fit(); 539 mCandidates.shrink_to_fit(); 540 mBreaks.shrink_to_fit(); 541 mWidths.shrink_to_fit(); 542 mFlags.shrink_to_fit(); 543 } 544 mStrategy = kBreakStrategy_Greedy; 545 mHyphenationFrequency = kHyphenationFrequency_Normal; 546 mLinePenalty = 0.0f; 547 mJustified = false; 548} 549 550} // namespace minikin 551