LineBreaker.h revision 57b6dae9894b9362ef04517ff477fd491f9d433b
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/** 18 * A module for breaking paragraphs into lines, supporting high quality 19 * hyphenation and justification. 20 */ 21 22#ifndef MINIKIN_LINE_BREAKER_H 23#define MINIKIN_LINE_BREAKER_H 24 25#include "unicode/brkiter.h" 26#include "unicode/locid.h" 27#include <cmath> 28#include <vector> 29#include "minikin/Hyphenator.h" 30#include "minikin/WordBreaker.h" 31 32namespace android { 33 34enum BreakStrategy { 35 kBreakStrategy_Greedy = 0, 36 kBreakStrategy_HighQuality = 1, 37 kBreakStrategy_Balanced = 2 38}; 39 40enum HyphenationFrequency { 41 kHyphenationFrequency_None = 0, 42 kHyphenationFrequency_Normal = 1, 43 kHyphenationFrequency_Full = 2 44}; 45 46// TODO: want to generalize to be able to handle array of line widths 47class LineWidths { 48 public: 49 void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 50 mFirstWidth = firstWidth; 51 mFirstWidthLineCount = firstWidthLineCount; 52 mRestWidth = restWidth; 53 } 54 void setIndents(const std::vector<float>& indents) { 55 mIndents = indents; 56 } 57 bool isConstant() const { 58 // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen 59 return mRestWidth == mFirstWidth && mIndents.empty(); 60 } 61 float getLineWidth(int line) const { 62 float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth; 63 if (!mIndents.empty()) { 64 if ((size_t)line < mIndents.size()) { 65 width -= mIndents[line]; 66 } else { 67 width -= mIndents.back(); 68 } 69 } 70 return width; 71 } 72 private: 73 float mFirstWidth; 74 int mFirstWidthLineCount; 75 float mRestWidth; 76 std::vector<float> mIndents; 77}; 78 79class TabStops { 80 public: 81 void set(const int* stops, size_t nStops, int tabWidth) { 82 if (stops != nullptr) { 83 mStops.assign(stops, stops + nStops); 84 } else { 85 mStops.clear(); 86 } 87 mTabWidth = tabWidth; 88 } 89 float nextTab(float widthSoFar) const { 90 for (size_t i = 0; i < mStops.size(); i++) { 91 if (mStops[i] > widthSoFar) { 92 return mStops[i]; 93 } 94 } 95 return floor(widthSoFar / mTabWidth + 1) * mTabWidth; 96 } 97 private: 98 std::vector<int> mStops; 99 int mTabWidth; 100}; 101 102class LineBreaker { 103 public: 104 const static int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java 105 106 // Note: Locale persists across multiple invocations (it is not cleaned up by finish()), 107 // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always 108 // be set on the first invocation, but callers are encouraged not to call again unless 109 // locale has actually changed. 110 // That logic could be here but it's better for performance that it's upstream because of 111 // the cost of constructing and comparing the ICU Locale object. 112 // Note: caller is responsible for managing lifetime of hyphenator 113 void setLocale(const icu::Locale& locale, Hyphenator* hyphenator); 114 115 void resize(size_t size) { 116 mTextBuf.resize(size); 117 mCharWidths.resize(size); 118 } 119 120 size_t size() const { 121 return mTextBuf.size(); 122 } 123 124 uint16_t* buffer() { 125 return mTextBuf.data(); 126 } 127 128 float* charWidths() { 129 return mCharWidths.data(); 130 } 131 132 // set text to current contents of buffer 133 void setText(); 134 135 void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth); 136 137 void setIndents(const std::vector<float>& indents); 138 139 void setTabStops(const int* stops, size_t nStops, int tabWidth) { 140 mTabStops.set(stops, nStops, tabWidth); 141 } 142 143 BreakStrategy getStrategy() const { return mStrategy; } 144 145 void setStrategy(BreakStrategy strategy) { mStrategy = strategy; } 146 147 HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; } 148 149 void setHyphenationFrequency(HyphenationFrequency frequency) { 150 mHyphenationFrequency = frequency; 151 } 152 153 // TODO: this class is actually fairly close to being general and not tied to using 154 // Minikin to do the shaping of the strings. The main thing that would need to be changed 155 // is having some kind of callback (or virtual class, or maybe even template), which could 156 // easily be instantiated with Minikin's Layout. Future work for when needed. 157 float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style, 158 size_t start, size_t end, bool isRtl); 159 160 void addReplacement(size_t start, size_t end, float width); 161 162 size_t computeBreaks(); 163 164 const int* getBreaks() const { 165 return mBreaks.data(); 166 } 167 168 const float* getWidths() const { 169 return mWidths.data(); 170 } 171 172 const int* getFlags() const { 173 return mFlags.data(); 174 } 175 176 void finish(); 177 178 private: 179 // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for 180 // very large paragraphs, accuracy could degrade using only 32-bit float. Note however 181 // that float is used extensively on the Java side for this. This is a typedef so that 182 // we can easily change it based on performance/accuracy tradeoff. 183 typedef double ParaWidth; 184 185 // A single candidate break 186 struct Candidate { 187 size_t offset; // offset to text buffer, in code units 188 size_t prev; // index to previous break 189 ParaWidth preBreak; 190 ParaWidth postBreak; 191 float penalty; // penalty of this break (for example, hyphen penalty) 192 float score; // best score found for this break 193 size_t lineNumber; // only updated for non-constant line widths 194 uint8_t hyphenEdit; 195 }; 196 197 float currentLineWidth() const; 198 199 void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty, 200 uint8_t hyph); 201 202 void addCandidate(Candidate cand); 203 204 // push an actual break to the output. Takes care of setting flags for tab 205 void pushBreak(int offset, float width, uint8_t hyph); 206 207 void computeBreaksGreedy(); 208 209 void computeBreaksOptimal(bool isRectangular); 210 211 void finishBreaksOptimal(); 212 213 WordBreaker mWordBreaker; 214 std::vector<uint16_t>mTextBuf; 215 std::vector<float>mCharWidths; 216 217 Hyphenator* mHyphenator; 218 std::vector<uint8_t> mHyphBuf; 219 220 // layout parameters 221 BreakStrategy mStrategy = kBreakStrategy_Greedy; 222 HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal; 223 LineWidths mLineWidths; 224 TabStops mTabStops; 225 226 // result of line breaking 227 std::vector<int> mBreaks; 228 std::vector<float> mWidths; 229 std::vector<int> mFlags; 230 231 ParaWidth mWidth = 0; 232 std::vector<Candidate> mCandidates; 233 float mLinePenalty = 0.0f; 234 235 // the following are state for greedy breaker (updated while adding style runs) 236 size_t mLastBreak; 237 size_t mBestBreak; 238 float mBestScore; 239 ParaWidth mPreBreak; // prebreak of last break 240 int mFirstTabIndex; 241}; 242 243} // namespace android 244 245#endif // MINIKIN_LINE_BREAKER_H 246