LineBreaker.h revision 5cdad92c300a65cab89b172e952186f0c5870657
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * A module for breaking paragraphs into lines, supporting high quality
19 * hyphenation and justification.
20 */
21
22#ifndef MINIKIN_LINE_BREAKER_H
23#define MINIKIN_LINE_BREAKER_H
24
25#include "unicode/brkiter.h"
26#include "unicode/locid.h"
27#include <cmath>
28#include <vector>
29#include "minikin/Hyphenator.h"
30
31namespace android {
32
33enum BreakStrategy {
34    kBreakStrategy_Greedy = 0,
35    kBreakStrategy_HighQuality = 1,
36    kBreakStrategy_Balanced = 2
37};
38
39// TODO: want to generalize to be able to handle array of line widths
40class LineWidths {
41    public:
42        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
43            mFirstWidth = firstWidth;
44            mFirstWidthLineCount = firstWidthLineCount;
45            mRestWidth = restWidth;
46        }
47        bool isConstant() const {
48            // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
49            return mRestWidth == mFirstWidth;
50        }
51        float getLineWidth(int line) const {
52            return (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
53        }
54    private:
55        float mFirstWidth;
56        int mFirstWidthLineCount;
57        float mRestWidth;
58};
59
60class TabStops {
61    public:
62        void set(const int* stops, size_t nStops, int tabWidth) {
63            if (stops != nullptr) {
64                mStops.assign(stops, stops + nStops);
65            } else {
66                mStops.clear();
67            }
68            mTabWidth = tabWidth;
69        }
70        float nextTab(float widthSoFar) const {
71            for (size_t i = 0; i < mStops.size(); i++) {
72                if (mStops[i] > widthSoFar) {
73                    return mStops[i];
74                }
75            }
76            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
77        }
78    private:
79        std::vector<int> mStops;
80        int mTabWidth;
81};
82
83class LineBreaker {
84    public:
85        const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java
86
87        ~LineBreaker() {
88            utext_close(&mUText);
89            delete mBreakIterator;
90        }
91
92        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
93        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
94        // be set on the first invocation, but callers are encouraged not to call again unless
95        // locale has actually changed.
96        // That logic could be here but it's better for performance that it's upstream because of
97        // the cost of constructing and comparing the ICU Locale object.
98        // Note: caller is responsible for managing lifetime of hyphenator
99        void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
100
101        void resize(size_t size) {
102            mTextBuf.resize(size);
103            mCharWidths.resize(size);
104        }
105
106        size_t size() const {
107            return mTextBuf.size();
108        }
109
110        uint16_t* buffer() {
111            return mTextBuf.data();
112        }
113
114        float* charWidths() {
115            return mCharWidths.data();
116        }
117
118        // set text to current contents of buffer
119        void setText();
120
121        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
122
123        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
124            mTabStops.set(stops, nStops, tabWidth);
125        }
126
127        BreakStrategy getStrategy() const { return mStrategy; }
128
129        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
130
131        // TODO: this class is actually fairly close to being general and not tied to using
132        // Minikin to do the shaping of the strings. The main thing that would need to be changed
133        // is having some kind of callback (or virtual class, or maybe even template), which could
134        // easily be instantiated with Minikin's Layout. Future work for when needed.
135        float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
136                size_t start, size_t end, bool isRtl);
137
138        void addReplacement(size_t start, size_t end, float width);
139
140        size_t computeBreaks();
141
142        const int* getBreaks() const {
143            return mBreaks.data();
144        }
145
146        const float* getWidths() const {
147            return mWidths.data();
148        }
149
150        const int* getFlags() const {
151            return mFlags.data();
152        }
153
154        void finish();
155
156    private:
157        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
158        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
159        // that float is used extensively on the Java side for this. This is a typedef so that
160        // we can easily change it based on performance/accuracy tradeoff.
161        typedef double ParaWidth;
162
163        // A single candidate break
164        struct Candidate {
165            size_t offset;  // offset to text buffer, in code units
166            size_t prev;  // index to previous break
167            ParaWidth preBreak;
168            ParaWidth postBreak;
169            float penalty;  // penalty of this break (for example, hyphen penalty)
170            float score;  // best score found for this break
171            size_t lineNumber;  // only updated for non-constant line widths
172            uint8_t hyphenEdit;
173        };
174
175        float currentLineWidth() const;
176
177        // compute shrink/stretch penalty for line
178        float computeScore(float delta, bool atEnd);
179
180        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
181                uint8_t hyph);
182
183        void addCandidate(Candidate cand);
184
185        // push an actual break to the output. Takes care of setting flags for tab
186        void pushBreak(int offset, float width, uint8_t hyph);
187
188        void computeBreaksGreedy();
189
190        void computeBreaksOptimal();
191
192        // special case when LineWidth is constant (layout is rectangle)
193        void computeBreaksOptimalRect();
194
195        void finishBreaksOptimal();
196
197        icu::BreakIterator* mBreakIterator = nullptr;
198        UText mUText = UTEXT_INITIALIZER;
199        std::vector<uint16_t>mTextBuf;
200        std::vector<float>mCharWidths;
201
202        Hyphenator* mHyphenator;
203        std::vector<uint8_t> mHyphBuf;
204
205        // layout parameters
206        BreakStrategy mStrategy = kBreakStrategy_Greedy;
207        LineWidths mLineWidths;
208        TabStops mTabStops;
209
210        // result of line breaking
211        std::vector<int> mBreaks;
212        std::vector<float> mWidths;
213        std::vector<int> mFlags;
214
215        ParaWidth mWidth = 0;
216        std::vector<Candidate> mCandidates;
217
218        // the following are state for greedy breaker (updated while adding style runs)
219        size_t mLastBreak;
220        size_t mBestBreak;
221        float mBestScore;
222        ParaWidth mPreBreak;  // prebreak of last break
223        int mFirstTabIndex;
224};
225
226}  // namespace android
227
228#endif  // MINIKIN_LINE_BREAKER_H
229