LineBreaker.h revision 57b6dae9894b9362ef04517ff477fd491f9d433b
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * A module for breaking paragraphs into lines, supporting high quality
19 * hyphenation and justification.
20 */
21
22#ifndef MINIKIN_LINE_BREAKER_H
23#define MINIKIN_LINE_BREAKER_H
24
25#include "unicode/brkiter.h"
26#include "unicode/locid.h"
27#include <cmath>
28#include <vector>
29#include "minikin/Hyphenator.h"
30#include "minikin/WordBreaker.h"
31
32namespace android {
33
34enum BreakStrategy {
35    kBreakStrategy_Greedy = 0,
36    kBreakStrategy_HighQuality = 1,
37    kBreakStrategy_Balanced = 2
38};
39
40enum HyphenationFrequency {
41    kHyphenationFrequency_None = 0,
42    kHyphenationFrequency_Normal = 1,
43    kHyphenationFrequency_Full = 2
44};
45
46// TODO: want to generalize to be able to handle array of line widths
47class LineWidths {
48    public:
49        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
50            mFirstWidth = firstWidth;
51            mFirstWidthLineCount = firstWidthLineCount;
52            mRestWidth = restWidth;
53        }
54        void setIndents(const std::vector<float>& indents) {
55            mIndents = indents;
56        }
57        bool isConstant() const {
58            // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
59            return mRestWidth == mFirstWidth && mIndents.empty();
60        }
61        float getLineWidth(int line) const {
62            float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
63            if (!mIndents.empty()) {
64                if ((size_t)line < mIndents.size()) {
65                    width -= mIndents[line];
66                } else {
67                    width -= mIndents.back();
68                }
69            }
70            return width;
71        }
72    private:
73        float mFirstWidth;
74        int mFirstWidthLineCount;
75        float mRestWidth;
76        std::vector<float> mIndents;
77};
78
79class TabStops {
80    public:
81        void set(const int* stops, size_t nStops, int tabWidth) {
82            if (stops != nullptr) {
83                mStops.assign(stops, stops + nStops);
84            } else {
85                mStops.clear();
86            }
87            mTabWidth = tabWidth;
88        }
89        float nextTab(float widthSoFar) const {
90            for (size_t i = 0; i < mStops.size(); i++) {
91                if (mStops[i] > widthSoFar) {
92                    return mStops[i];
93                }
94            }
95            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
96        }
97    private:
98        std::vector<int> mStops;
99        int mTabWidth;
100};
101
102class LineBreaker {
103    public:
104        const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java
105
106        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
107        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
108        // be set on the first invocation, but callers are encouraged not to call again unless
109        // locale has actually changed.
110        // That logic could be here but it's better for performance that it's upstream because of
111        // the cost of constructing and comparing the ICU Locale object.
112        // Note: caller is responsible for managing lifetime of hyphenator
113        void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
114
115        void resize(size_t size) {
116            mTextBuf.resize(size);
117            mCharWidths.resize(size);
118        }
119
120        size_t size() const {
121            return mTextBuf.size();
122        }
123
124        uint16_t* buffer() {
125            return mTextBuf.data();
126        }
127
128        float* charWidths() {
129            return mCharWidths.data();
130        }
131
132        // set text to current contents of buffer
133        void setText();
134
135        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
136
137        void setIndents(const std::vector<float>& indents);
138
139        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
140            mTabStops.set(stops, nStops, tabWidth);
141        }
142
143        BreakStrategy getStrategy() const { return mStrategy; }
144
145        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
146
147        HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; }
148
149        void setHyphenationFrequency(HyphenationFrequency frequency) {
150            mHyphenationFrequency = frequency;
151        }
152
153        // TODO: this class is actually fairly close to being general and not tied to using
154        // Minikin to do the shaping of the strings. The main thing that would need to be changed
155        // is having some kind of callback (or virtual class, or maybe even template), which could
156        // easily be instantiated with Minikin's Layout. Future work for when needed.
157        float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
158                size_t start, size_t end, bool isRtl);
159
160        void addReplacement(size_t start, size_t end, float width);
161
162        size_t computeBreaks();
163
164        const int* getBreaks() const {
165            return mBreaks.data();
166        }
167
168        const float* getWidths() const {
169            return mWidths.data();
170        }
171
172        const int* getFlags() const {
173            return mFlags.data();
174        }
175
176        void finish();
177
178    private:
179        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
180        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
181        // that float is used extensively on the Java side for this. This is a typedef so that
182        // we can easily change it based on performance/accuracy tradeoff.
183        typedef double ParaWidth;
184
185        // A single candidate break
186        struct Candidate {
187            size_t offset;  // offset to text buffer, in code units
188            size_t prev;  // index to previous break
189            ParaWidth preBreak;
190            ParaWidth postBreak;
191            float penalty;  // penalty of this break (for example, hyphen penalty)
192            float score;  // best score found for this break
193            size_t lineNumber;  // only updated for non-constant line widths
194            uint8_t hyphenEdit;
195        };
196
197        float currentLineWidth() const;
198
199        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
200                uint8_t hyph);
201
202        void addCandidate(Candidate cand);
203
204        // push an actual break to the output. Takes care of setting flags for tab
205        void pushBreak(int offset, float width, uint8_t hyph);
206
207        void computeBreaksGreedy();
208
209        void computeBreaksOptimal(bool isRectangular);
210
211        void finishBreaksOptimal();
212
213        WordBreaker mWordBreaker;
214        std::vector<uint16_t>mTextBuf;
215        std::vector<float>mCharWidths;
216
217        Hyphenator* mHyphenator;
218        std::vector<uint8_t> mHyphBuf;
219
220        // layout parameters
221        BreakStrategy mStrategy = kBreakStrategy_Greedy;
222        HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
223        LineWidths mLineWidths;
224        TabStops mTabStops;
225
226        // result of line breaking
227        std::vector<int> mBreaks;
228        std::vector<float> mWidths;
229        std::vector<int> mFlags;
230
231        ParaWidth mWidth = 0;
232        std::vector<Candidate> mCandidates;
233        float mLinePenalty = 0.0f;
234
235        // the following are state for greedy breaker (updated while adding style runs)
236        size_t mLastBreak;
237        size_t mBestBreak;
238        float mBestScore;
239        ParaWidth mPreBreak;  // prebreak of last break
240        int mFirstTabIndex;
241};
242
243}  // namespace android
244
245#endif  // MINIKIN_LINE_BREAKER_H
246