LineBreaker.h revision 0dc07c0be325b7c12c50729e04c4b2785a673fd7
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * A module for breaking paragraphs into lines, supporting high quality
19 * hyphenation and justification.
20 */
21
22#ifndef MINIKIN_LINE_BREAKER_H
23#define MINIKIN_LINE_BREAKER_H
24
25#include "unicode/brkiter.h"
26#include "unicode/locid.h"
27#include <cmath>
28#include <vector>
29#include "minikin/Hyphenator.h"
30
31namespace android {
32
33enum BreakStrategy {
34    kBreakStrategy_Greedy = 0,
35    kBreakStrategy_HighQuality = 1,
36    kBreakStrategy_Balanced = 2
37};
38
39enum HyphenationFrequency {
40    kHyphenationFrequency_None = 0,
41    kHyphenationFrequency_Normal = 1,
42    kHyphenationFrequency_Full = 2
43};
44
45// TODO: want to generalize to be able to handle array of line widths
46class LineWidths {
47    public:
48        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
49            mFirstWidth = firstWidth;
50            mFirstWidthLineCount = firstWidthLineCount;
51            mRestWidth = restWidth;
52        }
53        void setIndents(const std::vector<float>& indents) {
54            mIndents = indents;
55        }
56        bool isConstant() const {
57            // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
58            return mRestWidth == mFirstWidth && mIndents.empty();
59        }
60        float getLineWidth(int line) const {
61            float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
62            if (!mIndents.empty()) {
63                if ((size_t)line < mIndents.size()) {
64                    width -= mIndents[line];
65                } else {
66                    width -= mIndents.back();
67                }
68            }
69            return width;
70        }
71    private:
72        float mFirstWidth;
73        int mFirstWidthLineCount;
74        float mRestWidth;
75        std::vector<float> mIndents;
76};
77
78class TabStops {
79    public:
80        void set(const int* stops, size_t nStops, int tabWidth) {
81            if (stops != nullptr) {
82                mStops.assign(stops, stops + nStops);
83            } else {
84                mStops.clear();
85            }
86            mTabWidth = tabWidth;
87        }
88        float nextTab(float widthSoFar) const {
89            for (size_t i = 0; i < mStops.size(); i++) {
90                if (mStops[i] > widthSoFar) {
91                    return mStops[i];
92                }
93            }
94            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
95        }
96    private:
97        std::vector<int> mStops;
98        int mTabWidth;
99};
100
101class LineBreaker {
102    public:
103        const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java
104
105        ~LineBreaker() {
106            utext_close(&mUText);
107            delete mBreakIterator;
108        }
109
110        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
111        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
112        // be set on the first invocation, but callers are encouraged not to call again unless
113        // locale has actually changed.
114        // That logic could be here but it's better for performance that it's upstream because of
115        // the cost of constructing and comparing the ICU Locale object.
116        // Note: caller is responsible for managing lifetime of hyphenator
117        void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
118
119        void resize(size_t size) {
120            mTextBuf.resize(size);
121            mCharWidths.resize(size);
122        }
123
124        size_t size() const {
125            return mTextBuf.size();
126        }
127
128        uint16_t* buffer() {
129            return mTextBuf.data();
130        }
131
132        float* charWidths() {
133            return mCharWidths.data();
134        }
135
136        // set text to current contents of buffer
137        void setText();
138
139        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
140
141        void setIndents(const std::vector<float>& indents);
142
143        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
144            mTabStops.set(stops, nStops, tabWidth);
145        }
146
147        BreakStrategy getStrategy() const { return mStrategy; }
148
149        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
150
151        HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; }
152
153        void setHyphenationFrequency(HyphenationFrequency frequency) {
154            mHyphenationFrequency = frequency;
155        }
156
157        // TODO: this class is actually fairly close to being general and not tied to using
158        // Minikin to do the shaping of the strings. The main thing that would need to be changed
159        // is having some kind of callback (or virtual class, or maybe even template), which could
160        // easily be instantiated with Minikin's Layout. Future work for when needed.
161        float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
162                size_t start, size_t end, bool isRtl);
163
164        void addReplacement(size_t start, size_t end, float width);
165
166        size_t computeBreaks();
167
168        const int* getBreaks() const {
169            return mBreaks.data();
170        }
171
172        const float* getWidths() const {
173            return mWidths.data();
174        }
175
176        const int* getFlags() const {
177            return mFlags.data();
178        }
179
180        void finish();
181
182    private:
183        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
184        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
185        // that float is used extensively on the Java side for this. This is a typedef so that
186        // we can easily change it based on performance/accuracy tradeoff.
187        typedef double ParaWidth;
188
189        // A single candidate break
190        struct Candidate {
191            size_t offset;  // offset to text buffer, in code units
192            size_t prev;  // index to previous break
193            ParaWidth preBreak;
194            ParaWidth postBreak;
195            float penalty;  // penalty of this break (for example, hyphen penalty)
196            float score;  // best score found for this break
197            size_t lineNumber;  // only updated for non-constant line widths
198            uint8_t hyphenEdit;
199        };
200
201        float currentLineWidth() const;
202
203        // compute shrink/stretch penalty for line
204        float computeScore(float delta, bool atEnd);
205
206        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
207                uint8_t hyph);
208
209        void addCandidate(Candidate cand);
210
211        // push an actual break to the output. Takes care of setting flags for tab
212        void pushBreak(int offset, float width, uint8_t hyph);
213
214        void computeBreaksGreedy();
215
216        void computeBreaksOptimal();
217
218        // special case when LineWidth is constant (layout is rectangle)
219        void computeBreaksOptimalRect();
220
221        void finishBreaksOptimal();
222
223        icu::BreakIterator* mBreakIterator = nullptr;
224        UText mUText = UTEXT_INITIALIZER;
225        std::vector<uint16_t>mTextBuf;
226        std::vector<float>mCharWidths;
227
228        Hyphenator* mHyphenator;
229        std::vector<uint8_t> mHyphBuf;
230
231        // layout parameters
232        BreakStrategy mStrategy = kBreakStrategy_Greedy;
233        HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
234        LineWidths mLineWidths;
235        TabStops mTabStops;
236
237        // result of line breaking
238        std::vector<int> mBreaks;
239        std::vector<float> mWidths;
240        std::vector<int> mFlags;
241
242        ParaWidth mWidth = 0;
243        std::vector<Candidate> mCandidates;
244
245        // the following are state for greedy breaker (updated while adding style runs)
246        size_t mLastBreak;
247        size_t mBestBreak;
248        float mBestScore;
249        ParaWidth mPreBreak;  // prebreak of last break
250        int mFirstTabIndex;
251};
252
253}  // namespace android
254
255#endif  // MINIKIN_LINE_BREAKER_H
256