OptimalLineBreaker.cpp revision c9611626465f9e11817854f554e7b7d0c6cee905
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "OptimalLineBreaker.h" 18 19#include <algorithm> 20#include <limits> 21 22#include "minikin/Characters.h" 23#include "minikin/Layout.h" 24#include "minikin/Range.h" 25#include "minikin/U16StringPiece.h" 26 27#include "HyphenatorMap.h" 28#include "LayoutUtils.h" 29#include "LineBreakerUtil.h" 30#include "Locale.h" 31#include "LocaleListCache.h" 32#include "MinikinInternal.h" 33#include "WordBreaker.h" 34 35namespace minikin { 36 37namespace { 38 39// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these 40// constants are larger than any reasonable actual width score. 41constexpr float SCORE_INFTY = std::numeric_limits<float>::max(); 42constexpr float SCORE_OVERFULL = 1e12f; 43constexpr float SCORE_DESPERATE = 1e10f; 44 45// Multiplier for hyphen penalty on last line. 46constexpr float LAST_LINE_PENALTY_MULTIPLIER = 4.0f; 47// Penalty assigned to each line break (to try to minimize number of lines) 48// TODO: when we implement full justification (so spaces can shrink and stretch), this is 49// probably not the most appropriate method. 50constexpr float LINE_PENALTY_MULTIPLIER = 2.0f; 51 52// Penalty assigned to shrinking the whitepsace. 53constexpr float SHRINK_PENALTY_MULTIPLIER = 4.0f; 54 55// Maximum amount that spaces can shrink, in justified text. 56constexpr float SHRINKABILITY = 1.0 / 3.0; 57 58// A single candidate break 59struct Candidate { 60 uint32_t offset; // offset to text buffer, in code units 61 62 ParaWidth preBreak; // width of text until this point, if we decide to not break here: 63 // preBreak is used as an optimized way to calculate the width 64 // between two candidates. The line width between two line break 65 // candidates i and j is calculated as postBreak(j) - preBreak(i). 66 ParaWidth postBreak; // width of text until this point, if we decide to break here 67 float penalty; // penalty of this break (for example, hyphen penalty) 68 uint32_t preSpaceCount; // preceding space count before breaking 69 uint32_t postSpaceCount; // preceding space count after breaking 70 HyphenationType hyphenType; 71 bool isRtl; // The direction of the bidi run containing or ending in this candidate 72 73 Candidate(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty, 74 uint32_t preSpaceCount, uint32_t postSpaceCount, HyphenationType hyphenType, 75 bool isRtl) 76 : offset(offset), 77 preBreak(preBreak), 78 postBreak(postBreak), 79 penalty(penalty), 80 preSpaceCount(preSpaceCount), 81 postSpaceCount(postSpaceCount), 82 hyphenType(hyphenType), 83 isRtl(isRtl) {} 84}; 85 86// A context of line break optimization. 87struct OptimizeContext { 88 // The break candidates. 89 std::vector<Candidate> candidates; 90 91 // The penalty for the number of lines. 92 float linePenalty = 0.0f; 93 94 // The width of a space. May be 0 if there are no spaces. 95 // Note: if there are multiple different widths for spaces (for example, because of mixing of 96 // fonts), it's only guaranteed to pick one. 97 float spaceWidth = 0.0f; 98 99 // Append desperate break point to the candidates. 100 inline void pushDesperate(uint32_t offset, ParaWidth sumOfCharWidths, uint32_t spaceCount, 101 bool isRtl) { 102 candidates.emplace_back(offset, sumOfCharWidths, sumOfCharWidths, SCORE_DESPERATE, 103 spaceCount, spaceCount, 104 HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, isRtl); 105 } 106 107 // Append hyphenation break point to the candidates. 108 inline void pushHyphenation(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak, 109 float penalty, uint32_t spaceCount, HyphenationType type, 110 bool isRtl) { 111 candidates.emplace_back(offset, preBreak, postBreak, penalty, spaceCount, spaceCount, type, 112 isRtl); 113 } 114 115 // Append word break point to the candidates. 116 inline void pushWordBreak(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak, 117 float penalty, uint32_t preSpaceCount, uint32_t postSpaceCount, 118 bool isRtl) { 119 candidates.emplace_back(offset, preBreak, postBreak, penalty, preSpaceCount, postSpaceCount, 120 HyphenationType::DONT_BREAK, isRtl); 121 } 122 123 OptimizeContext() { 124 candidates.emplace_back(0, 0.0f, 0.0f, 0.0f, 0, 0, HyphenationType::DONT_BREAK, false); 125 } 126}; 127 128// Compute the penalty for the run and returns penalty for hyphenation and number of lines. 129std::pair<float, float> computePenalties(const Run& run, const LineWidth& lineWidth, 130 HyphenationFrequency frequency, bool justified) { 131 float linePenalty = 0.0; 132 const MinikinPaint* paint = run.getPaint(); 133 // a heuristic that seems to perform well 134 float hyphenPenalty = 0.5 * paint->size * paint->scaleX * lineWidth.getAt(0); 135 if (frequency == HyphenationFrequency::Normal) { 136 hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing 137 } 138 139 if (justified) { 140 // Make hyphenation more aggressive for fully justified text (so that "normal" in 141 // justified mode is the same as "full" in ragged-right). 142 hyphenPenalty *= 0.25; 143 } else { 144 // Line penalty is zero for justified text. 145 linePenalty = hyphenPenalty * LINE_PENALTY_MULTIPLIER; 146 } 147 148 return std::make_pair(hyphenPenalty, linePenalty); 149} 150 151// Represents a desperate break point. 152struct DesperateBreak { 153 // The break offset. 154 uint32_t offset; 155 156 // The sum of the character width from the beginning of the word. 157 ParaWidth sumOfChars; 158 159 DesperateBreak(uint32_t offset, ParaWidth sumOfChars) 160 : offset(offset), sumOfChars(sumOfChars){}; 161}; 162 163// Retrieves desperate break points from a word. 164std::vector<DesperateBreak> populateDesperatePoints(const MeasuredText& measured, 165 const Range& range) { 166 std::vector<DesperateBreak> out; 167 ParaWidth width = measured.widths[range.getStart()]; 168 for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) { 169 const float w = measured.widths[i]; 170 if (w == 0) { 171 continue; // w == 0 means here is not a grapheme bounds. Don't break here. 172 } 173 out.emplace_back(i, width); 174 width += w; 175 } 176 return out; 177} 178 179// Append hyphenation break points and desperate break points. 180// If an offset is a both candidate for hyphenation and desperate break points, place desperate 181// break candidate first and hyphenation break points second since the result width of the desperate 182// break is shorter than hyphenation break. 183// This is important since DP in computeBreaksOptimal assumes that the result line width is 184// increased by break offset. 185void appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter, 186 std::vector<HyphenBreak>::const_iterator endHyIter, 187 const std::vector<DesperateBreak>& desperates, const CharProcessor& proc, 188 float hyphenPenalty, bool isRtl, OptimizeContext* out) { 189 auto d = desperates.begin(); 190 while (hyIter != endHyIter || d != desperates.end()) { 191 // If both hyphen breaks and desperate breaks point to the same offset, push desperate 192 // breaks first. 193 if (d != desperates.end() && (hyIter == endHyIter || d->offset <= hyIter->offset)) { 194 out->pushDesperate(d->offset, proc.sumOfCharWidthsAtPrevWordBreak + d->sumOfChars, 195 proc.effectiveSpaceCount, isRtl); 196 d++; 197 } else { 198 out->pushHyphenation(hyIter->offset, proc.sumOfCharWidths - hyIter->second, 199 proc.sumOfCharWidthsAtPrevWordBreak + hyIter->first, hyphenPenalty, 200 proc.effectiveSpaceCount, hyIter->type, isRtl); 201 hyIter++; 202 } 203 } 204} 205 206// Enumerate all line break candidates. 207OptimizeContext populateCandidates(const U16StringPiece& textBuf, const MeasuredText& measured, 208 const LineWidth& lineWidth, HyphenationFrequency frequency, 209 bool isJustified) { 210 const ParaWidth minLineWidth = lineWidth.getMin(); 211 CharProcessor proc(textBuf); 212 213 OptimizeContext result; 214 215 const bool doHyphenation = frequency != HyphenationFrequency::None; 216 auto hyIter = std::begin(measured.hyphenBreaks); 217 218 for (const auto& run : measured.runs) { 219 const bool isRtl = run->isRtl(); 220 const Range& range = run->getRange(); 221 222 // Compute penalty parameters. 223 float hyphenPenalty = 0.0f; 224 if (run->canHyphenate()) { 225 auto penalties = computePenalties(*run, lineWidth, frequency, isJustified); 226 hyphenPenalty = penalties.first; 227 result.linePenalty = std::max(penalties.second, result.linePenalty); 228 } 229 230 proc.updateLocaleIfNecessary(*run); 231 232 for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) { 233 MINIKIN_ASSERT(textBuf[i] != CHAR_TAB, "TAB is not supported in optimal line breaker"); 234 proc.feedChar(i, textBuf[i], measured.widths[i]); 235 236 const uint32_t nextCharOffset = i + 1; 237 if (nextCharOffset != proc.nextWordBreak) { 238 continue; // Wait until word break point. 239 } 240 241 // Add hyphenation and desperate break points. 242 std::vector<HyphenBreak> hyphenedBreaks; 243 std::vector<DesperateBreak> desperateBreaks; 244 const Range contextRange = proc.contextRange(); 245 246 auto beginHyIter = hyIter; 247 while (hyIter != std::end(measured.hyphenBreaks) && 248 hyIter->offset < contextRange.getEnd()) { 249 hyIter++; 250 } 251 if (proc.widthFromLastWordBreak() > minLineWidth) { 252 desperateBreaks = populateDesperatePoints(measured, contextRange); 253 } 254 appendWithMerging(beginHyIter, doHyphenation ? hyIter : beginHyIter, desperateBreaks, 255 proc, hyphenPenalty, isRtl, &result); 256 257 // We skip breaks for zero-width characters inside replacement spans. 258 if (nextCharOffset == range.getEnd() || measured.widths[nextCharOffset] > 0) { 259 const float penalty = hyphenPenalty * proc.wordBreakPenalty(); 260 result.pushWordBreak(nextCharOffset, proc.sumOfCharWidths, proc.effectiveWidth, 261 penalty, proc.rawSpaceCount, proc.effectiveSpaceCount, isRtl); 262 } 263 } 264 } 265 result.spaceWidth = proc.spaceWidth; 266 return result; 267} 268 269class LineBreakOptimizer { 270public: 271 LineBreakOptimizer() {} 272 273 LineBreakResult computeBreaks(const OptimizeContext& context, const MeasuredText& measuredText, 274 const LineWidth& lineWidth, BreakStrategy strategy, 275 bool justified); 276 277private: 278 // Data used to compute optimal line breaks 279 struct OptimalBreaksData { 280 float score; // best score found for this break 281 uint32_t prev; // index to previous break 282 uint32_t lineNumber; // the computed line number of the candidate 283 }; 284 LineBreakResult finishBreaksOptimal(const MeasuredText& measured, 285 const std::vector<OptimalBreaksData>& breaksData, 286 const std::vector<Candidate>& candidates); 287 288 MinikinExtent computeMaxExtent(const MeasuredText& measured, uint32_t start, 289 uint32_t end) const; 290}; 291 292// Find the needed extent between the start and end ranges. start is inclusive and end is exclusive. 293// Both are indices of the source string. 294MinikinExtent LineBreakOptimizer::computeMaxExtent(const MeasuredText& measured, uint32_t start, 295 uint32_t end) const { 296 MinikinExtent res = {0.0, 0.0, 0.0}; 297 for (uint32_t j = start; j < end; j++) { 298 res.extendBy(measured.extents[j]); 299 } 300 return res; 301} 302 303// Follow "prev" links in candidates array, and copy to result arrays. 304LineBreakResult LineBreakOptimizer::finishBreaksOptimal( 305 const MeasuredText& measured, const std::vector<OptimalBreaksData>& breaksData, 306 const std::vector<Candidate>& candidates) { 307 LineBreakResult result; 308 const uint32_t nCand = candidates.size(); 309 uint32_t prevIndex; 310 for (uint32_t i = nCand - 1; i > 0; i = prevIndex) { 311 prevIndex = breaksData[i].prev; 312 const Candidate& cand = candidates[i]; 313 const Candidate& prev = candidates[prevIndex]; 314 315 result.breakPoints.push_back(cand.offset); 316 result.widths.push_back(cand.postBreak - prev.preBreak); 317 MinikinExtent extent = computeMaxExtent(measured, prev.offset, cand.offset); 318 result.ascents.push_back(extent.ascent); 319 result.descents.push_back(extent.descent); 320 321 const HyphenEdit edit = 322 packHyphenEdit(editForNextLine(prev.hyphenType), editForThisLine(cand.hyphenType)); 323 result.flags.push_back(static_cast<int>(edit)); 324 } 325 result.reverse(); 326 return result; 327} 328 329LineBreakResult LineBreakOptimizer::computeBreaks(const OptimizeContext& context, 330 const MeasuredText& measured, 331 const LineWidth& lineWidth, 332 BreakStrategy strategy, bool justified) { 333 const std::vector<Candidate>& candidates = context.candidates; 334 uint32_t active = 0; 335 const uint32_t nCand = candidates.size(); 336 const float maxShrink = justified ? SHRINKABILITY * context.spaceWidth : 0.0f; 337 338 std::vector<OptimalBreaksData> breaksData; 339 breaksData.reserve(nCand); 340 breaksData.push_back({0.0, 0, 0}); // The first candidate is always at the first line. 341 342 // "i" iterates through candidates for the end of the line. 343 for (uint32_t i = 1; i < nCand; i++) { 344 const bool atEnd = i == nCand - 1; 345 float best = SCORE_INFTY; 346 uint32_t bestPrev = 0; 347 348 uint32_t lineNumberLast = breaksData[active].lineNumber; 349 float width = lineWidth.getAt(lineNumberLast); 350 351 ParaWidth leftEdge = candidates[i].postBreak - width; 352 float bestHope = 0; 353 354 // "j" iterates through candidates for the beginning of the line. 355 for (uint32_t j = active; j < i; j++) { 356 const uint32_t lineNumber = breaksData[j].lineNumber; 357 if (lineNumber != lineNumberLast) { 358 const float widthNew = lineWidth.getAt(lineNumber); 359 if (widthNew != width) { 360 leftEdge = candidates[i].postBreak - width; 361 bestHope = 0; 362 width = widthNew; 363 } 364 lineNumberLast = lineNumber; 365 } 366 const float jScore = breaksData[j].score; 367 if (jScore + bestHope >= best) continue; 368 const float delta = candidates[j].preBreak - leftEdge; 369 370 // compute width score for line 371 372 // Note: the "bestHope" optimization makes the assumption that, when delta is 373 // non-negative, widthScore will increase monotonically as successive candidate 374 // breaks are considered. 375 float widthScore = 0.0f; 376 float additionalPenalty = 0.0f; 377 if ((atEnd || !justified) && delta < 0) { 378 widthScore = SCORE_OVERFULL; 379 } else if (atEnd && strategy != BreakStrategy::Balanced) { 380 // increase penalty for hyphen on last line 381 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * candidates[j].penalty; 382 } else { 383 widthScore = delta * delta; 384 if (delta < 0) { 385 if (-delta < 386 maxShrink * (candidates[i].postSpaceCount - candidates[j].preSpaceCount)) { 387 widthScore *= SHRINK_PENALTY_MULTIPLIER; 388 } else { 389 widthScore = SCORE_OVERFULL; 390 } 391 } 392 } 393 394 if (delta < 0) { 395 active = j + 1; 396 } else { 397 bestHope = widthScore; 398 } 399 400 const float score = jScore + widthScore + additionalPenalty; 401 if (score <= best) { 402 best = score; 403 bestPrev = j; 404 } 405 } 406 breaksData.push_back({best + candidates[i].penalty + context.linePenalty, // score 407 bestPrev, // prev 408 breaksData[bestPrev].lineNumber + 1}); // lineNumber 409 } 410 return finishBreaksOptimal(measured, breaksData, candidates); 411} 412 413} // namespace 414 415LineBreakResult breakLineOptimal(const U16StringPiece& textBuf, const MeasuredText& measured, 416 const LineWidth& lineWidth, BreakStrategy strategy, 417 HyphenationFrequency frequency, bool justified) { 418 if (textBuf.size() == 0) { 419 return LineBreakResult(); 420 } 421 const OptimizeContext context = 422 populateCandidates(textBuf, measured, lineWidth, frequency, justified); 423 LineBreakOptimizer optimizer; 424 return optimizer.computeBreaks(context, measured, lineWidth, strategy, justified); 425} 426 427} // namespace minikin 428