LineBreaker.cpp revision 0b25d5ac85533f64764a0d53d5e5d33b46b715fa
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define VERBOSE_DEBUG 0 18 19#include <limits> 20 21#define LOG_TAG "Minikin" 22#include <cutils/log.h> 23 24#include <minikin/Layout.h> 25#include <minikin/LineBreaker.h> 26 27using std::vector; 28 29namespace android { 30 31const int CHAR_TAB = 0x0009; 32 33// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these 34// constants are larger than any reasonable actual width score. 35const float SCORE_INFTY = std::numeric_limits<float>::max(); 36const float SCORE_OVERFULL = 1e12f; 37const float SCORE_DESPERATE = 1e10f; 38 39// When the text buffer is within this limit, capacity of vectors is retained at finish(), 40// to avoid allocation. 41const size_t MAX_TEXT_BUF_RETAIN = 32678; 42 43void LineBreaker::setText() { 44 UErrorCode status = U_ZERO_ERROR; 45 utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status); 46 mBreakIterator->setText(&mUText, status); 47 mBreakIterator->first(); 48 49 // handle initial break here because addStyleRun may never be called 50 mBreakIterator->next(); 51 mCandidates.clear(); 52 Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0}; 53 mCandidates.push_back(cand); 54 55 // reset greedy breaker state 56 mBreaks.clear(); 57 mWidths.clear(); 58 mFlags.clear(); 59 mLastBreak = 0; 60 mBestBreak = 0; 61 mBestScore = SCORE_INFTY; 62 mPreBreak = 0; 63 mFirstTabIndex = INT_MAX; 64} 65 66void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 67 ALOGD("width %f", firstWidth); 68 mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); 69} 70 71// This function determines whether a character is a space that disappears at end of line. 72// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]] 73// Note: all such characters are in the BMP, so it's ok to use code units for this. 74static bool isLineEndSpace(uint16_t c) { 75 return c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || c == 0x205F || 76 c == 0x3000; 77} 78 79// Ordinarily, this method measures the text in the range given. However, when paint 80// is nullptr, it assumes the widths have already been calculated and stored in the 81// width buffer. 82// This method finds the candidate word breaks (using the ICU break iterator) and sends them 83// to addCandidate. 84float LineBreaker::addStyleRun(const MinikinPaint* paint, const FontCollection* typeface, 85 FontStyle style, size_t start, size_t end, bool isRtl) { 86 Layout layout; // performance TODO: move layout to self object to reduce allocation cost? 87 float width = 0.0f; 88 int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; 89 90 if (paint != nullptr) { 91 layout.setFontCollection(typeface); 92 layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style, 93 *paint); 94 layout.getAdvances(mCharWidths.data() + start); 95 width = layout.getAdvance(); 96 } 97 98 ParaWidth postBreak = mWidth; 99 size_t current = (size_t)mBreakIterator->current(); 100 for (size_t i = start; i < end; i++) { 101 uint16_t c = mTextBuf[i]; 102 if (c == CHAR_TAB) { 103 mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); 104 if (mFirstTabIndex == INT_MAX) { 105 mFirstTabIndex = (int)i; 106 } 107 // fall back to greedy; other modes don't know how to deal with tabs 108 mStrategy = kBreakStrategy_Greedy; 109 } else { 110 mWidth += mCharWidths[i]; 111 if (!isLineEndSpace(c)) { 112 postBreak = mWidth; 113 } 114 } 115 if (i + 1 == current) { 116 // TODO: hyphenation goes here 117 118 // Skip break for zero-width characters. 119 if (current == mTextBuf.size() || mCharWidths[current] > 0) { 120 addWordBreak(current, mWidth, postBreak, 0); 121 } 122 current = (size_t)mBreakIterator->next(); 123 } 124 } 125 126 return width; 127} 128 129// add a word break (possibly for a hyphenated fragment), and add desperate breaks if 130// needed (ie when word exceeds current line width) 131void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, 132 float penalty) { 133 Candidate cand; 134 ParaWidth width = mCandidates.back().preBreak; 135 if (postBreak - width > currentLineWidth()) { 136 // Add desperate breaks. 137 // Note: these breaks are based on the shaping of the (non-broken) original text; they 138 // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping. 139 size_t i = mCandidates.back().offset; 140 width += mCharWidths[i++]; 141 for (; i < offset; i++) { 142 float w = mCharWidths[i]; 143 if (w > 0) { 144 cand.offset = i; 145 cand.preBreak = width; 146 cand.postBreak = width; 147 cand.penalty = SCORE_DESPERATE; 148#if VERBOSE_DEBUG 149 ALOGD("desperate cand: %d %g:%g", 150 mCandidates.size(), cand.postBreak, cand.preBreak); 151#endif 152 addCandidate(cand); 153 width += w; 154 } 155 } 156 } 157 158 cand.offset = offset; 159 cand.preBreak = preBreak; 160 cand.postBreak = postBreak; 161 cand.penalty = penalty; 162#if VERBOSE_DEBUG 163 ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); 164#endif 165 addCandidate(cand); 166} 167 168// TODO performance: could avoid populating mCandidates if greedy only 169void LineBreaker::addCandidate(Candidate cand) { 170 size_t candIndex = mCandidates.size(); 171 mCandidates.push_back(cand); 172 if (cand.postBreak - mPreBreak > currentLineWidth()) { 173 // This break would create an overfull line, pick the best break and break there (greedy) 174 if (mBestBreak == mLastBreak) { 175 mBestBreak = candIndex; 176 } 177 mBreaks.push_back(mCandidates[mBestBreak].offset); 178 mWidths.push_back(mCandidates[mBestBreak].postBreak - mPreBreak); 179 mFlags.push_back(mFirstTabIndex < mBreaks.back()); 180 mFirstTabIndex = INT_MAX; 181 mBestScore = SCORE_INFTY; 182#if VERBOSE_DEBUG 183 ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); 184#endif 185 mLastBreak = mBestBreak; 186 mPreBreak = mCandidates[mBestBreak].preBreak; 187 } 188 if (cand.penalty <= mBestScore) { 189 mBestBreak = candIndex; 190 mBestScore = cand.penalty; 191 } 192} 193 194void LineBreaker::addReplacement(size_t start, size_t end, float width) { 195 mCharWidths[start] = width; 196 std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); 197 addStyleRun(nullptr, nullptr, FontStyle(), start, end, false); 198} 199 200float LineBreaker::currentLineWidth() const { 201 return mLineWidths.getLineWidth(mBreaks.size()); 202} 203 204void LineBreaker::computeBreaksGreedy() { 205 // All breaks but the last have been added in addCandidate already. 206 size_t nCand = mCandidates.size(); 207 if (nCand == 1 || mLastBreak != nCand - 1) { 208 mBreaks.push_back(mCandidates[nCand - 1].offset); 209 mWidths.push_back(mCandidates[nCand - 1].postBreak - mPreBreak); 210 mFlags.push_back(mFirstTabIndex < mBreaks.back()); 211 // don't need to update mFirstTabIndex or mBestScore, because we're done 212#if VERBOSE_DEBUG 213 ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); 214#endif 215 } 216} 217 218void LineBreaker::computeBreaksOpt() { 219 // clear existing greedy break result 220 mBreaks.clear(); 221 mWidths.clear(); 222 mFlags.clear(); 223 size_t active = 0; 224 size_t nCand = mCandidates.size(); 225 float width = mLineWidths.getLineWidth(0); 226 // TODO: actually support non-constant width 227 for (size_t i = 1; i < nCand; i++) { 228 bool stretchIsFree = mStrategy != kBreakStrategy_Balanced && i == nCand - 1; 229 float best = SCORE_INFTY; 230 size_t bestPrev = 0; 231 232 // Width-based component of score increases as line gets shorter, so score will always be 233 // at least this. 234 float bestHope = 0; 235 236 ParaWidth leftEdge = mCandidates[i].postBreak - width; 237 for (size_t j = active; j < i; j++) { 238 float jScore = mCandidates[j].score; 239 if (jScore + bestHope >= best) continue; 240 float delta = mCandidates[j].preBreak - leftEdge; 241 242 // TODO: for justified text, refine with shrink/stretch 243 float widthScore; 244 if (delta < 0) { 245 widthScore = SCORE_OVERFULL; 246 active = j + 1; 247 } else { 248 widthScore = stretchIsFree ? 0 : delta * delta; 249 bestHope = widthScore; 250 } 251 252 float score = jScore + widthScore; 253 if (score <= best) { 254 best = score; 255 bestPrev = j; 256 } 257 } 258 mCandidates[i].score = best + mCandidates[i].penalty; 259 mCandidates[i].prev = bestPrev; 260 } 261 size_t prev; 262 for (size_t i = nCand - 1; i > 0; i = prev) { 263 prev = mCandidates[i].prev; 264 mBreaks.push_back(mCandidates[i].offset); 265 mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); 266 mFlags.push_back(0); 267 } 268 std::reverse(mBreaks.begin(), mBreaks.end()); 269 std::reverse(mWidths.begin(), mWidths.end()); 270 std::reverse(mFlags.begin(), mFlags.end()); 271} 272 273size_t LineBreaker::computeBreaks() { 274 if (mStrategy == kBreakStrategy_Greedy) { 275 computeBreaksGreedy(); 276 } else { 277 computeBreaksOpt(); 278 } 279 return mBreaks.size(); 280} 281 282void LineBreaker::finish() { 283 mWidth = 0; 284 mCandidates.clear(); 285 mBreaks.clear(); 286 mWidths.clear(); 287 mFlags.clear(); 288 if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { 289 mTextBuf.clear(); 290 mTextBuf.shrink_to_fit(); 291 mCharWidths.clear(); 292 mCharWidths.shrink_to_fit(); 293 mCandidates.shrink_to_fit(); 294 mBreaks.shrink_to_fit(); 295 mWidths.shrink_to_fit(); 296 mFlags.shrink_to_fit(); 297 } 298 mStrategy = kBreakStrategy_Greedy; 299} 300 301} // namespace android 302