1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define VERBOSE_DEBUG 0 18 19#define LOG_TAG "Minikin" 20 21#include <limits> 22 23#include <log/log.h> 24 25#include "LayoutUtils.h" 26#include <minikin/Layout.h> 27#include <minikin/LineBreaker.h> 28 29using std::vector; 30 31namespace minikin { 32 33const int CHAR_TAB = 0x0009; 34 35// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these 36// constants are larger than any reasonable actual width score. 37const float SCORE_INFTY = std::numeric_limits<float>::max(); 38const float SCORE_OVERFULL = 1e12f; 39const float SCORE_DESPERATE = 1e10f; 40 41// Multiplier for hyphen penalty on last line. 42const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f; 43// Penalty assigned to each line break (to try to minimize number of lines) 44// TODO: when we implement full justification (so spaces can shrink and stretch), this is 45// probably not the most appropriate method. 46const float LINE_PENALTY_MULTIPLIER = 2.0f; 47 48// Penalty assigned to shrinking the whitepsace. 49const float SHRINK_PENALTY_MULTIPLIER = 4.0f; 50 51// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for 52// unreasonably long words. This is somewhat of a heuristic because extremely long words 53// are possible in some languages. This does mean that very long real words can get 54// broken by desperate breaks, with no hyphens. 55const size_t LONGEST_HYPHENATED_WORD = 45; 56 57// When the text buffer is within this limit, capacity of vectors is retained at finish(), 58// to avoid allocation. 59const size_t MAX_TEXT_BUF_RETAIN = 32678; 60 61// Maximum amount that spaces can shrink, in justified text. 62const float SHRINKABILITY = 1.0 / 3.0; 63 64void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) { 65 mWordBreaker.setLocale(locale); 66 mLocale = locale; 67 mHyphenator = hyphenator; 68} 69 70void LineBreaker::setText() { 71 mWordBreaker.setText(mTextBuf.data(), mTextBuf.size()); 72 73 // handle initial break here because addStyleRun may never be called 74 mWordBreaker.next(); 75 mCandidates.clear(); 76 Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK}; 77 mCandidates.push_back(cand); 78 79 // reset greedy breaker state 80 mBreaks.clear(); 81 mWidths.clear(); 82 mFlags.clear(); 83 mLastBreak = 0; 84 mBestBreak = 0; 85 mBestScore = SCORE_INFTY; 86 mPreBreak = 0; 87 mLastHyphenation = HyphenEdit::NO_EDIT; 88 mFirstTabIndex = INT_MAX; 89 mSpaceCount = 0; 90} 91 92void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 93 mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); 94} 95 96 97void LineBreaker::setIndents(const std::vector<float>& indents) { 98 mLineWidths.setIndents(indents); 99} 100 101// This function determines whether a character is a space that disappears at end of line. 102// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], 103// plus '\n'. 104// Note: all such characters are in the BMP, so it's ok to use code units for this. 105static bool isLineEndSpace(uint16_t c) { 106 return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || 107 c == 0x205F || c == 0x3000; 108} 109 110// Ordinarily, this method measures the text in the range given. However, when paint 111// is nullptr, it assumes the widths have already been calculated and stored in the 112// width buffer. 113// This method finds the candidate word breaks (using the ICU break iterator) and sends them 114// to addCandidate. 115float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface, 116 FontStyle style, size_t start, size_t end, bool isRtl) { 117 float width = 0.0f; 118 int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; 119 120 float hyphenPenalty = 0.0; 121 if (paint != nullptr) { 122 width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, 123 style, *paint, typeface, mCharWidths.data() + start); 124 125 // a heuristic that seems to perform well 126 hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0); 127 if (mHyphenationFrequency == kHyphenationFrequency_Normal) { 128 hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing 129 } 130 131 if (mJustified) { 132 // Make hyphenation more aggressive for fully justified text (so that "normal" in 133 // justified mode is the same as "full" in ragged-right). 134 hyphenPenalty *= 0.25; 135 } else { 136 // Line penalty is zero for justified text. 137 mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); 138 } 139 } 140 141 size_t current = (size_t)mWordBreaker.current(); 142 size_t afterWord = start; 143 size_t lastBreak = start; 144 ParaWidth lastBreakWidth = mWidth; 145 ParaWidth postBreak = mWidth; 146 size_t postSpaceCount = mSpaceCount; 147 for (size_t i = start; i < end; i++) { 148 uint16_t c = mTextBuf[i]; 149 if (c == CHAR_TAB) { 150 mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); 151 if (mFirstTabIndex == INT_MAX) { 152 mFirstTabIndex = (int)i; 153 } 154 // fall back to greedy; other modes don't know how to deal with tabs 155 mStrategy = kBreakStrategy_Greedy; 156 } else { 157 if (isWordSpace(c)) mSpaceCount += 1; 158 mWidth += mCharWidths[i]; 159 if (!isLineEndSpace(c)) { 160 postBreak = mWidth; 161 postSpaceCount = mSpaceCount; 162 afterWord = i + 1; 163 } 164 } 165 if (i + 1 == current) { 166 size_t wordStart = mWordBreaker.wordStart(); 167 size_t wordEnd = mWordBreaker.wordEnd(); 168 if (paint != nullptr && mHyphenator != nullptr && 169 mHyphenationFrequency != kHyphenationFrequency_None && 170 wordStart >= start && wordEnd > wordStart && 171 wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) { 172 mHyphenator->hyphenate(&mHyphBuf, 173 &mTextBuf[wordStart], 174 wordEnd - wordStart, 175 mLocale); 176#if VERBOSE_DEBUG 177 std::string hyphenatedString; 178 for (size_t j = wordStart; j < wordEnd; j++) { 179 if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) { 180 hyphenatedString.push_back('-'); 181 } 182 // Note: only works with ASCII, should do UTF-8 conversion here 183 hyphenatedString.push_back(buffer()[j]); 184 } 185 ALOGD("hyphenated string: %s", hyphenatedString.c_str()); 186#endif 187 188 // measure hyphenated substrings 189 for (size_t j = wordStart; j < wordEnd; j++) { 190 HyphenationType hyph = mHyphBuf[j - wordStart]; 191 if (hyph != HyphenationType::DONT_BREAK) { 192 paint->hyphenEdit = HyphenEdit::editForThisLine(hyph); 193 const float firstPartWidth = Layout::measureText(mTextBuf.data(), 194 lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style, 195 *paint, typeface, nullptr); 196 ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth; 197 198 paint->hyphenEdit = HyphenEdit::editForNextLine(hyph); 199 const float secondPartWidth = Layout::measureText(mTextBuf.data(), j, 200 afterWord - j, mTextBuf.size(), bidiFlags, style, *paint, 201 typeface, nullptr); 202 ParaWidth hyphPreBreak = postBreak - secondPartWidth; 203 204 addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount, 205 hyphenPenalty, hyph); 206 207 paint->hyphenEdit = HyphenEdit::NO_EDIT; 208 } 209 } 210 } 211 212 // Skip break for zero-width characters inside replacement span 213 if (paint != nullptr || current == end || mCharWidths[current] > 0) { 214 float penalty = hyphenPenalty * mWordBreaker.breakBadness(); 215 addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty, 216 HyphenationType::DONT_BREAK); 217 } 218 lastBreak = current; 219 lastBreakWidth = mWidth; 220 current = (size_t)mWordBreaker.next(); 221 } 222 } 223 224 return width; 225} 226 227// add a word break (possibly for a hyphenated fragment), and add desperate breaks if 228// needed (ie when word exceeds current line width) 229void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, 230 size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) { 231 Candidate cand; 232 ParaWidth width = mCandidates.back().preBreak; 233 if (postBreak - width > currentLineWidth()) { 234 // Add desperate breaks. 235 // Note: these breaks are based on the shaping of the (non-broken) original text; they 236 // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping. 237 size_t i = mCandidates.back().offset; 238 width += mCharWidths[i++]; 239 for (; i < offset; i++) { 240 float w = mCharWidths[i]; 241 if (w > 0) { 242 cand.offset = i; 243 cand.preBreak = width; 244 cand.postBreak = width; 245 // postSpaceCount doesn't include trailing spaces 246 cand.preSpaceCount = postSpaceCount; 247 cand.postSpaceCount = postSpaceCount; 248 cand.penalty = SCORE_DESPERATE; 249 cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; 250#if VERBOSE_DEBUG 251 ALOGD("desperate cand: %zd %g:%g", 252 mCandidates.size(), cand.postBreak, cand.preBreak); 253#endif 254 addCandidate(cand); 255 width += w; 256 } 257 } 258 } 259 260 cand.offset = offset; 261 cand.preBreak = preBreak; 262 cand.postBreak = postBreak; 263 cand.penalty = penalty; 264 cand.preSpaceCount = preSpaceCount; 265 cand.postSpaceCount = postSpaceCount; 266 cand.hyphenType = hyph; 267#if VERBOSE_DEBUG 268 ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); 269#endif 270 addCandidate(cand); 271} 272 273// Helper method for addCandidate() 274void LineBreaker::pushGreedyBreak() { 275 const Candidate& bestCandidate = mCandidates[mBestBreak]; 276 pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak, 277 mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType)); 278 mBestScore = SCORE_INFTY; 279#if VERBOSE_DEBUG 280 ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); 281#endif 282 mLastBreak = mBestBreak; 283 mPreBreak = bestCandidate.preBreak; 284 mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType); 285} 286 287// TODO performance: could avoid populating mCandidates if greedy only 288void LineBreaker::addCandidate(Candidate cand) { 289 const size_t candIndex = mCandidates.size(); 290 mCandidates.push_back(cand); 291 292 // mLastBreak is the index of the last line break we decided to do in mCandidates, 293 // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate 294 // we have found since then, and mBestScore is its penalty. 295 if (cand.postBreak - mPreBreak > currentLineWidth()) { 296 // This break would create an overfull line, pick the best break and break there (greedy) 297 if (mBestBreak == mLastBreak) { 298 // No good break has been found since last break. Break here. 299 mBestBreak = candIndex; 300 } 301 pushGreedyBreak(); 302 } 303 304 while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) { 305 // We should rarely come here. But if we are here, we have broken the line, but the 306 // remaining part still doesn't fit. We now need to break at the second best place after the 307 // last break, but we have not kept that information, so we need to go back and find it. 308 // 309 // In some really rare cases, postBreak - preBreak of a candidate itself may be over the 310 // current line width. We protect ourselves against an infinite loop in that case by 311 // checking that we have not broken the line at this candidate already. 312 for (size_t i = mLastBreak + 1; i < candIndex; i++) { 313 const float penalty = mCandidates[i].penalty; 314 if (penalty <= mBestScore) { 315 mBestBreak = i; 316 mBestScore = penalty; 317 } 318 } 319 if (mBestBreak == mLastBreak) { 320 // We didn't find anything good. Break here. 321 mBestBreak = candIndex; 322 } 323 pushGreedyBreak(); 324 } 325 326 if (cand.penalty <= mBestScore) { 327 mBestBreak = candIndex; 328 mBestScore = cand.penalty; 329 } 330} 331 332void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) { 333 mBreaks.push_back(offset); 334 mWidths.push_back(width); 335 int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift; 336 flags |= hyphenEdit; 337 mFlags.push_back(flags); 338 mFirstTabIndex = INT_MAX; 339} 340 341void LineBreaker::addReplacement(size_t start, size_t end, float width) { 342 mCharWidths[start] = width; 343 std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); 344 addStyleRun(nullptr, nullptr, FontStyle(), start, end, false); 345} 346 347// Get the width of a space. May return 0 if there are no spaces. 348// Note: if there are multiple different widths for spaces (for example, because of mixing of 349// fonts), it's only guaranteed to pick one. 350float LineBreaker::getSpaceWidth() const { 351 for (size_t i = 0; i < mTextBuf.size(); i++) { 352 if (isWordSpace(mTextBuf[i])) { 353 return mCharWidths[i]; 354 } 355 } 356 return 0.0f; 357} 358 359float LineBreaker::currentLineWidth() const { 360 return mLineWidths.getLineWidth(mBreaks.size()); 361} 362 363void LineBreaker::computeBreaksGreedy() { 364 // All breaks but the last have been added in addCandidate already. 365 size_t nCand = mCandidates.size(); 366 if (nCand == 1 || mLastBreak != nCand - 1) { 367 pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 368 mLastHyphenation); 369 // don't need to update mBestScore, because we're done 370#if VERBOSE_DEBUG 371 ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); 372#endif 373 } 374} 375 376// Follow "prev" links in mCandidates array, and copy to result arrays. 377void LineBreaker::finishBreaksOptimal() { 378 // clear existing greedy break result 379 mBreaks.clear(); 380 mWidths.clear(); 381 mFlags.clear(); 382 size_t nCand = mCandidates.size(); 383 size_t prev; 384 for (size_t i = nCand - 1; i > 0; i = prev) { 385 prev = mCandidates[i].prev; 386 mBreaks.push_back(mCandidates[i].offset); 387 mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); 388 int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType); 389 if (prev > 0) { 390 flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType); 391 } 392 mFlags.push_back(flags); 393 } 394 std::reverse(mBreaks.begin(), mBreaks.end()); 395 std::reverse(mWidths.begin(), mWidths.end()); 396 std::reverse(mFlags.begin(), mFlags.end()); 397} 398 399void LineBreaker::computeBreaksOptimal(bool isRectangle) { 400 size_t active = 0; 401 size_t nCand = mCandidates.size(); 402 float width = mLineWidths.getLineWidth(0); 403 float shortLineFactor = mJustified ? 0.75f : 0.5f; 404 float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f; 405 406 // "i" iterates through candidates for the end of the line. 407 for (size_t i = 1; i < nCand; i++) { 408 bool atEnd = i == nCand - 1; 409 float best = SCORE_INFTY; 410 size_t bestPrev = 0; 411 size_t lineNumberLast = 0; 412 413 if (!isRectangle) { 414 size_t lineNumberLast = mCandidates[active].lineNumber; 415 width = mLineWidths.getLineWidth(lineNumberLast); 416 } 417 ParaWidth leftEdge = mCandidates[i].postBreak - width; 418 float bestHope = 0; 419 420 // "j" iterates through candidates for the beginning of the line. 421 for (size_t j = active; j < i; j++) { 422 if (!isRectangle) { 423 size_t lineNumber = mCandidates[j].lineNumber; 424 if (lineNumber != lineNumberLast) { 425 float widthNew = mLineWidths.getLineWidth(lineNumber); 426 if (widthNew != width) { 427 leftEdge = mCandidates[i].postBreak - width; 428 bestHope = 0; 429 width = widthNew; 430 } 431 lineNumberLast = lineNumber; 432 } 433 } 434 float jScore = mCandidates[j].score; 435 if (jScore + bestHope >= best) continue; 436 float delta = mCandidates[j].preBreak - leftEdge; 437 438 // compute width score for line 439 440 // Note: the "bestHope" optimization makes the assumption that, when delta is 441 // non-negative, widthScore will increase monotonically as successive candidate 442 // breaks are considered. 443 float widthScore = 0.0f; 444 float additionalPenalty = 0.0f; 445 if ((atEnd || !mJustified) && delta < 0) { 446 widthScore = SCORE_OVERFULL; 447 } else if (atEnd && mStrategy != kBreakStrategy_Balanced) { 448 // increase penalty for hyphen on last line 449 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty; 450 // Penalize very short (< 1 - shortLineFactor of total width) lines. 451 float underfill = delta - shortLineFactor * width; 452 widthScore = underfill > 0 ? underfill * underfill : 0; 453 } else { 454 widthScore = delta * delta; 455 if (delta < 0) { 456 if (-delta < maxShrink * 457 (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) { 458 widthScore *= SHRINK_PENALTY_MULTIPLIER; 459 } else { 460 widthScore = SCORE_OVERFULL; 461 } 462 } 463 } 464 465 if (delta < 0) { 466 active = j + 1; 467 } else { 468 bestHope = widthScore; 469 } 470 471 float score = jScore + widthScore + additionalPenalty; 472 if (score <= best) { 473 best = score; 474 bestPrev = j; 475 } 476 } 477 mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty; 478 mCandidates[i].prev = bestPrev; 479 mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1; 480#if VERBOSE_DEBUG 481 ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev); 482#endif 483 } 484 finishBreaksOptimal(); 485} 486 487size_t LineBreaker::computeBreaks() { 488 if (mStrategy == kBreakStrategy_Greedy) { 489 computeBreaksGreedy(); 490 } else { 491 computeBreaksOptimal(mLineWidths.isConstant()); 492 } 493 return mBreaks.size(); 494} 495 496void LineBreaker::finish() { 497 mWordBreaker.finish(); 498 mWidth = 0; 499 mLineWidths.clear(); 500 mCandidates.clear(); 501 mBreaks.clear(); 502 mWidths.clear(); 503 mFlags.clear(); 504 if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { 505 mTextBuf.clear(); 506 mTextBuf.shrink_to_fit(); 507 mCharWidths.clear(); 508 mCharWidths.shrink_to_fit(); 509 mHyphBuf.clear(); 510 mHyphBuf.shrink_to_fit(); 511 mCandidates.shrink_to_fit(); 512 mBreaks.shrink_to_fit(); 513 mWidths.shrink_to_fit(); 514 mFlags.shrink_to_fit(); 515 } 516 mStrategy = kBreakStrategy_Greedy; 517 mHyphenationFrequency = kHyphenationFrequency_Normal; 518 mLinePenalty = 0.0f; 519 mJustified = false; 520} 521 522} // namespace minikin 523