LineBreaker.h revision dc7bc6e39e1ef6b713b927baf24db8b4f02f1a3f
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * A module for breaking paragraphs into lines, supporting high quality
19 * hyphenation and justification.
20 */
21
22#ifndef MINIKIN_LINE_BREAKER_H
23#define MINIKIN_LINE_BREAKER_H
24
25#include "unicode/brkiter.h"
26#include "unicode/locid.h"
27#include <cmath>
28#include <vector>
29#include "minikin/Hyphenator.h"
30
31namespace android {
32
33enum BreakStrategy {
34    kBreakStrategy_Greedy = 0,
35    kBreakStrategy_HighQuality = 1,
36    kBreakStrategy_Balanced = 2
37};
38
39// TODO: want to generalize to be able to handle array of line widths
40class LineWidths {
41    public:
42        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
43            mFirstWidth = firstWidth;
44            mFirstWidthLineCount = firstWidthLineCount;
45            mRestWidth = restWidth;
46        }
47        void setMargins(const std::vector<float>& margins) {
48            mMargins = margins;
49        }
50        bool isConstant() const {
51            // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
52            return mRestWidth == mFirstWidth && mMargins.empty();
53        }
54        float getLineWidth(int line) const {
55            float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
56            if (!mMargins.empty()) {
57                if ((size_t)line < mMargins.size()) {
58                    width -= mMargins[line];
59                } else {
60                    width -= mMargins.back();
61                }
62            }
63            return width;
64        }
65    private:
66        float mFirstWidth;
67        int mFirstWidthLineCount;
68        float mRestWidth;
69        std::vector<float> mMargins;
70};
71
72class TabStops {
73    public:
74        void set(const int* stops, size_t nStops, int tabWidth) {
75            if (stops != nullptr) {
76                mStops.assign(stops, stops + nStops);
77            } else {
78                mStops.clear();
79            }
80            mTabWidth = tabWidth;
81        }
82        float nextTab(float widthSoFar) const {
83            for (size_t i = 0; i < mStops.size(); i++) {
84                if (mStops[i] > widthSoFar) {
85                    return mStops[i];
86                }
87            }
88            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
89        }
90    private:
91        std::vector<int> mStops;
92        int mTabWidth;
93};
94
95class LineBreaker {
96    public:
97        const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java
98
99        ~LineBreaker() {
100            utext_close(&mUText);
101            delete mBreakIterator;
102        }
103
104        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
105        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
106        // be set on the first invocation, but callers are encouraged not to call again unless
107        // locale has actually changed.
108        // That logic could be here but it's better for performance that it's upstream because of
109        // the cost of constructing and comparing the ICU Locale object.
110        // Note: caller is responsible for managing lifetime of hyphenator
111        void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
112
113        void resize(size_t size) {
114            mTextBuf.resize(size);
115            mCharWidths.resize(size);
116        }
117
118        size_t size() const {
119            return mTextBuf.size();
120        }
121
122        uint16_t* buffer() {
123            return mTextBuf.data();
124        }
125
126        float* charWidths() {
127            return mCharWidths.data();
128        }
129
130        // set text to current contents of buffer
131        void setText();
132
133        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
134
135        void setMargins(const std::vector<float>& margins);
136
137        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
138            mTabStops.set(stops, nStops, tabWidth);
139        }
140
141        BreakStrategy getStrategy() const { return mStrategy; }
142
143        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
144
145        // TODO: this class is actually fairly close to being general and not tied to using
146        // Minikin to do the shaping of the strings. The main thing that would need to be changed
147        // is having some kind of callback (or virtual class, or maybe even template), which could
148        // easily be instantiated with Minikin's Layout. Future work for when needed.
149        float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
150                size_t start, size_t end, bool isRtl);
151
152        void addReplacement(size_t start, size_t end, float width);
153
154        size_t computeBreaks();
155
156        const int* getBreaks() const {
157            return mBreaks.data();
158        }
159
160        const float* getWidths() const {
161            return mWidths.data();
162        }
163
164        const int* getFlags() const {
165            return mFlags.data();
166        }
167
168        void finish();
169
170    private:
171        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
172        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
173        // that float is used extensively on the Java side for this. This is a typedef so that
174        // we can easily change it based on performance/accuracy tradeoff.
175        typedef double ParaWidth;
176
177        // A single candidate break
178        struct Candidate {
179            size_t offset;  // offset to text buffer, in code units
180            size_t prev;  // index to previous break
181            ParaWidth preBreak;
182            ParaWidth postBreak;
183            float penalty;  // penalty of this break (for example, hyphen penalty)
184            float score;  // best score found for this break
185            size_t lineNumber;  // only updated for non-constant line widths
186            uint8_t hyphenEdit;
187        };
188
189        float currentLineWidth() const;
190
191        // compute shrink/stretch penalty for line
192        float computeScore(float delta, bool atEnd);
193
194        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
195                uint8_t hyph);
196
197        void addCandidate(Candidate cand);
198
199        // push an actual break to the output. Takes care of setting flags for tab
200        void pushBreak(int offset, float width, uint8_t hyph);
201
202        void computeBreaksGreedy();
203
204        void computeBreaksOptimal();
205
206        // special case when LineWidth is constant (layout is rectangle)
207        void computeBreaksOptimalRect();
208
209        void finishBreaksOptimal();
210
211        icu::BreakIterator* mBreakIterator = nullptr;
212        UText mUText = UTEXT_INITIALIZER;
213        std::vector<uint16_t>mTextBuf;
214        std::vector<float>mCharWidths;
215
216        Hyphenator* mHyphenator;
217        std::vector<uint8_t> mHyphBuf;
218
219        // layout parameters
220        BreakStrategy mStrategy = kBreakStrategy_Greedy;
221        LineWidths mLineWidths;
222        TabStops mTabStops;
223
224        // result of line breaking
225        std::vector<int> mBreaks;
226        std::vector<float> mWidths;
227        std::vector<int> mFlags;
228
229        ParaWidth mWidth = 0;
230        std::vector<Candidate> mCandidates;
231
232        // the following are state for greedy breaker (updated while adding style runs)
233        size_t mLastBreak;
234        size_t mBestBreak;
235        float mBestScore;
236        ParaWidth mPreBreak;  // prebreak of last break
237        int mFirstTabIndex;
238};
239
240}  // namespace android
241
242#endif  // MINIKIN_LINE_BREAKER_H
243