LineBreaker.cpp revision 0dc07c0be325b7c12c50729e04c4b2785a673fd7
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define VERBOSE_DEBUG 0 18 19#include <limits> 20 21#define LOG_TAG "Minikin" 22#include <cutils/log.h> 23 24#include <minikin/Layout.h> 25#include <minikin/LineBreaker.h> 26 27using std::vector; 28 29namespace android { 30 31const int CHAR_TAB = 0x0009; 32const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; 33 34// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these 35// constants are larger than any reasonable actual width score. 36const float SCORE_INFTY = std::numeric_limits<float>::max(); 37const float SCORE_OVERFULL = 1e12f; 38const float SCORE_DESPERATE = 1e10f; 39 40// When the text buffer is within this limit, capacity of vectors is retained at finish(), 41// to avoid allocation. 42const size_t MAX_TEXT_BUF_RETAIN = 32678; 43 44void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) { 45 delete mBreakIterator; 46 UErrorCode status = U_ZERO_ERROR; 47 mBreakIterator = icu::BreakIterator::createLineInstance(locale, status); 48 // TODO: check status 49 50 // TODO: load actual resource dependent on locale; letting Minikin do it is a hack 51 mHyphenator = hyphenator; 52} 53 54void LineBreaker::setText() { 55 UErrorCode status = U_ZERO_ERROR; 56 utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status); 57 mBreakIterator->setText(&mUText, status); 58 mBreakIterator->first(); 59 60 // handle initial break here because addStyleRun may never be called 61 mBreakIterator->next(); 62 mCandidates.clear(); 63 Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0}; 64 mCandidates.push_back(cand); 65 66 // reset greedy breaker state 67 mBreaks.clear(); 68 mWidths.clear(); 69 mFlags.clear(); 70 mLastBreak = 0; 71 mBestBreak = 0; 72 mBestScore = SCORE_INFTY; 73 mPreBreak = 0; 74 mFirstTabIndex = INT_MAX; 75} 76 77void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 78 mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); 79} 80 81 82void LineBreaker::setIndents(const std::vector<float>& indents) { 83 mLineWidths.setIndents(indents); 84} 85 86// This function determines whether a character is a space that disappears at end of line. 87// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], 88// plus '\n'. 89// Note: all such characters are in the BMP, so it's ok to use code units for this. 90static bool isLineEndSpace(uint16_t c) { 91 return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || 92 c == 0x205F || c == 0x3000; 93} 94 95// Ordinarily, this method measures the text in the range given. However, when paint 96// is nullptr, it assumes the widths have already been calculated and stored in the 97// width buffer. 98// This method finds the candidate word breaks (using the ICU break iterator) and sends them 99// to addCandidate. 100float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface, 101 FontStyle style, size_t start, size_t end, bool isRtl) { 102 Layout layout; // performance TODO: move layout to self object to reduce allocation cost? 103 float width = 0.0f; 104 int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; 105 106 float hyphenPenalty = 0.0; 107 if (paint != nullptr) { 108 layout.setFontCollection(typeface); 109 layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style, 110 *paint); 111 layout.getAdvances(mCharWidths.data() + start); 112 width = layout.getAdvance(); 113 114 // a heuristic that seems to perform well 115 hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0); 116 if (mHyphenationFrequency == kHyphenationFrequency_Normal) { 117 hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing 118 } 119 } 120 121 size_t current = (size_t)mBreakIterator->current(); 122 size_t wordEnd = start; 123 size_t lastBreak = start; 124 ParaWidth lastBreakWidth = mWidth; 125 ParaWidth postBreak = mWidth; 126 for (size_t i = start; i < end; i++) { 127 uint16_t c = mTextBuf[i]; 128 if (c == CHAR_TAB) { 129 mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); 130 if (mFirstTabIndex == INT_MAX) { 131 mFirstTabIndex = (int)i; 132 } 133 // fall back to greedy; other modes don't know how to deal with tabs 134 mStrategy = kBreakStrategy_Greedy; 135 } else { 136 mWidth += mCharWidths[i]; 137 if (!isLineEndSpace(c)) { 138 postBreak = mWidth; 139 wordEnd = i + 1; 140 } 141 } 142 if (i + 1 == current) { 143 // Override ICU's treatment of soft hyphen as a break opportunity, because we want it 144 // to be a hyphen break, with penalty and drawing behavior. 145 if (c != CHAR_SOFT_HYPHEN) { 146 if (paint != nullptr && mHyphenator != nullptr && 147 mHyphenationFrequency != kHyphenationFrequency_None && 148 wordEnd > lastBreak) { 149 mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak); 150 #if VERBOSE_DEBUG 151 std::string hyphenatedString; 152 for (size_t j = lastBreak; j < wordEnd; j++) { 153 if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-'); 154 // Note: only works with ASCII, should do UTF-8 conversion here 155 hyphenatedString.push_back(buffer()[j]); 156 } 157 ALOGD("hyphenated string: %s", hyphenatedString.c_str()); 158 #endif 159 160 // measure hyphenated substrings 161 for (size_t j = lastBreak; j < wordEnd; j++) { 162 uint8_t hyph = mHyphBuf[j - lastBreak]; 163 if (hyph) { 164 paint->hyphenEdit = hyph; 165 layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak, 166 mTextBuf.size(), bidiFlags, style, *paint); 167 ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance(); 168 paint->hyphenEdit = 0; 169 layout.doLayout(mTextBuf.data(), j, wordEnd - j, 170 mTextBuf.size(), bidiFlags, style, *paint); 171 ParaWidth hyphPreBreak = postBreak - layout.getAdvance(); 172 addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph); 173 } 174 } 175 } 176 177 // Skip break for zero-width characters. 178 if (current == mTextBuf.size() || mCharWidths[current] > 0) { 179 addWordBreak(current, mWidth, postBreak, 0.0, 0); 180 } 181 lastBreak = current; 182 lastBreakWidth = mWidth; 183 } 184 current = (size_t)mBreakIterator->next(); 185 } 186 } 187 188 return width; 189} 190 191// add a word break (possibly for a hyphenated fragment), and add desperate breaks if 192// needed (ie when word exceeds current line width) 193void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, 194 float penalty, uint8_t hyph) { 195 Candidate cand; 196 ParaWidth width = mCandidates.back().preBreak; 197 if (postBreak - width > currentLineWidth()) { 198 // Add desperate breaks. 199 // Note: these breaks are based on the shaping of the (non-broken) original text; they 200 // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping. 201 size_t i = mCandidates.back().offset; 202 width += mCharWidths[i++]; 203 for (; i < offset; i++) { 204 float w = mCharWidths[i]; 205 if (w > 0) { 206 cand.offset = i; 207 cand.preBreak = width; 208 cand.postBreak = width; 209 cand.penalty = SCORE_DESPERATE; 210 cand.hyphenEdit = 0; 211#if VERBOSE_DEBUG 212 ALOGD("desperate cand: %d %g:%g", 213 mCandidates.size(), cand.postBreak, cand.preBreak); 214#endif 215 addCandidate(cand); 216 width += w; 217 } 218 } 219 } 220 221 cand.offset = offset; 222 cand.preBreak = preBreak; 223 cand.postBreak = postBreak; 224 cand.penalty = penalty; 225 cand.hyphenEdit = hyph; 226#if VERBOSE_DEBUG 227 ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); 228#endif 229 addCandidate(cand); 230} 231 232// TODO: for justified text, refine with shrink/stretch 233float LineBreaker::computeScore(float delta, bool atEnd) { 234 if (delta < 0) { 235 return SCORE_OVERFULL; 236 } else if (atEnd && mStrategy != kBreakStrategy_Balanced) { 237 return 0.0; 238 } else { 239 return delta * delta; 240 } 241} 242 243// TODO performance: could avoid populating mCandidates if greedy only 244void LineBreaker::addCandidate(Candidate cand) { 245 size_t candIndex = mCandidates.size(); 246 mCandidates.push_back(cand); 247 if (cand.postBreak - mPreBreak > currentLineWidth()) { 248 // This break would create an overfull line, pick the best break and break there (greedy) 249 if (mBestBreak == mLastBreak) { 250 mBestBreak = candIndex; 251 } 252 pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak, 253 mCandidates[mBestBreak].hyphenEdit); 254 mBestScore = SCORE_INFTY; 255#if VERBOSE_DEBUG 256 ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); 257#endif 258 mLastBreak = mBestBreak; 259 mPreBreak = mCandidates[mBestBreak].preBreak; 260 } 261 if (cand.penalty <= mBestScore) { 262 mBestBreak = candIndex; 263 mBestScore = cand.penalty; 264 } 265} 266 267void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) { 268 mBreaks.push_back(offset); 269 mWidths.push_back(width); 270 int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift; 271 flags |= hyph; 272 mFlags.push_back(flags); 273 mFirstTabIndex = INT_MAX; 274} 275 276void LineBreaker::addReplacement(size_t start, size_t end, float width) { 277 mCharWidths[start] = width; 278 std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); 279 addStyleRun(nullptr, nullptr, FontStyle(), start, end, false); 280} 281 282float LineBreaker::currentLineWidth() const { 283 return mLineWidths.getLineWidth(mBreaks.size()); 284} 285 286void LineBreaker::computeBreaksGreedy() { 287 // All breaks but the last have been added in addCandidate already. 288 size_t nCand = mCandidates.size(); 289 if (nCand == 1 || mLastBreak != nCand - 1) { 290 pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0); 291 // don't need to update mBestScore, because we're done 292#if VERBOSE_DEBUG 293 ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); 294#endif 295 } 296} 297 298// Follow "prev" links in mCandidates array, and copy to result arrays. 299void LineBreaker::finishBreaksOptimal() { 300 // clear existing greedy break result 301 mBreaks.clear(); 302 mWidths.clear(); 303 mFlags.clear(); 304 size_t nCand = mCandidates.size(); 305 size_t prev; 306 for (size_t i = nCand - 1; i > 0; i = prev) { 307 prev = mCandidates[i].prev; 308 mBreaks.push_back(mCandidates[i].offset); 309 mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); 310 mFlags.push_back(mCandidates[i].hyphenEdit); 311 } 312 std::reverse(mBreaks.begin(), mBreaks.end()); 313 std::reverse(mWidths.begin(), mWidths.end()); 314 std::reverse(mFlags.begin(), mFlags.end()); 315} 316 317void LineBreaker::computeBreaksOptimal() { 318 size_t active = 0; 319 size_t nCand = mCandidates.size(); 320 for (size_t i = 1; i < nCand; i++) { 321 bool atEnd = i == nCand - 1; 322 float best = SCORE_INFTY; 323 size_t bestPrev = 0; 324 325 size_t lineNumberLast = mCandidates[active].lineNumber; 326 float width = mLineWidths.getLineWidth(lineNumberLast); 327 ParaWidth leftEdge = mCandidates[i].postBreak - width; 328 float bestHope = 0; 329 330 for (size_t j = active; j < i; j++) { 331 size_t lineNumber = mCandidates[j].lineNumber; 332 if (lineNumber != lineNumberLast) { 333 float widthNew = mLineWidths.getLineWidth(lineNumber); 334 if (widthNew != width) { 335 leftEdge = mCandidates[i].postBreak - width; 336 bestHope = 0; 337 width = widthNew; 338 } 339 lineNumberLast = lineNumber; 340 } 341 float jScore = mCandidates[j].score; 342 if (jScore + bestHope >= best) continue; 343 float delta = mCandidates[j].preBreak - leftEdge; 344 345 float widthScore = computeScore(delta, atEnd); 346 if (delta < 0) { 347 active = j + 1; 348 } else { 349 bestHope = widthScore; 350 } 351 352 float score = jScore + widthScore; 353 if (score <= best) { 354 best = score; 355 bestPrev = j; 356 } 357 } 358 mCandidates[i].score = best + mCandidates[i].penalty; 359 mCandidates[i].prev = bestPrev; 360 mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1; 361 } 362 finishBreaksOptimal(); 363} 364 365void LineBreaker::computeBreaksOptimalRect() { 366 size_t active = 0; 367 size_t nCand = mCandidates.size(); 368 float width = mLineWidths.getLineWidth(0); 369 for (size_t i = 1; i < nCand; i++) { 370 bool atEnd = i == nCand - 1; 371 float best = SCORE_INFTY; 372 size_t bestPrev = 0; 373 374 // Width-based component of score increases as line gets shorter, so score will always be 375 // at least this. 376 float bestHope = 0; 377 378 ParaWidth leftEdge = mCandidates[i].postBreak - width; 379 for (size_t j = active; j < i; j++) { 380 // TODO performance: can break if bestHope >= best; worth it? 381 float jScore = mCandidates[j].score; 382 if (jScore + bestHope >= best) continue; 383 float delta = mCandidates[j].preBreak - leftEdge; 384 385 float widthScore = computeScore(delta, atEnd); 386 if (delta < 0) { 387 active = j + 1; 388 } else { 389 bestHope = widthScore; 390 } 391 392 float score = jScore + widthScore; 393 if (score <= best) { 394 best = score; 395 bestPrev = j; 396 } 397 } 398 mCandidates[i].score = best + mCandidates[i].penalty; 399 mCandidates[i].prev = bestPrev; 400 } 401 finishBreaksOptimal(); 402} 403 404size_t LineBreaker::computeBreaks() { 405 if (mStrategy == kBreakStrategy_Greedy) { 406 computeBreaksGreedy(); 407 } else if (mLineWidths.isConstant()) { 408 computeBreaksOptimalRect(); 409 } else { 410 computeBreaksOptimal(); 411 } 412 return mBreaks.size(); 413} 414 415void LineBreaker::finish() { 416 mWidth = 0; 417 mCandidates.clear(); 418 mBreaks.clear(); 419 mWidths.clear(); 420 mFlags.clear(); 421 if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { 422 mTextBuf.clear(); 423 mTextBuf.shrink_to_fit(); 424 mCharWidths.clear(); 425 mCharWidths.shrink_to_fit(); 426 mHyphBuf.clear(); 427 mHyphBuf.shrink_to_fit(); 428 mCandidates.shrink_to_fit(); 429 mBreaks.shrink_to_fit(); 430 mWidths.shrink_to_fit(); 431 mFlags.shrink_to_fit(); 432 } 433 mStrategy = kBreakStrategy_Greedy; 434 mHyphenationFrequency = kHyphenationFrequency_Normal; 435} 436 437} // namespace android 438