1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * A module for breaking paragraphs into lines, supporting high quality
19 * hyphenation and justification.
20 */
21
22#ifndef MINIKIN_LINE_BREAKER_H
23#define MINIKIN_LINE_BREAKER_H
24
25#include <gtest/gtest_prod.h>
26#include "unicode/brkiter.h"
27#include "unicode/locid.h"
28#include <cmath>
29#include <vector>
30#include "minikin/FontCollection.h"
31#include "minikin/Hyphenator.h"
32#include "minikin/MinikinFont.h"
33#include "minikin/WordBreaker.h"
34
35namespace minikin {
36
37enum BreakStrategy {
38    kBreakStrategy_Greedy = 0,
39    kBreakStrategy_HighQuality = 1,
40    kBreakStrategy_Balanced = 2
41};
42
43enum HyphenationFrequency {
44    kHyphenationFrequency_None = 0,
45    kHyphenationFrequency_Normal = 1,
46    kHyphenationFrequency_Full = 2
47};
48
49// TODO: want to generalize to be able to handle array of line widths
50class LineWidths {
51    public:
52        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
53            mFirstWidth = firstWidth;
54            mFirstWidthLineCount = firstWidthLineCount;
55            mRestWidth = restWidth;
56        }
57        void setIndents(const std::vector<float>& indents) {
58            mIndents = indents;
59        }
60        bool isConstant() const {
61            // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
62            return mRestWidth == mFirstWidth && mIndents.empty();
63        }
64        float getLineWidth(int line) const {
65            float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
66            if (!mIndents.empty()) {
67                if ((size_t)line < mIndents.size()) {
68                    width -= mIndents[line];
69                } else {
70                    width -= mIndents.back();
71                }
72            }
73            return width;
74        }
75        void clear() {
76            mIndents.clear();
77        }
78    private:
79        float mFirstWidth;
80        int mFirstWidthLineCount;
81        float mRestWidth;
82        std::vector<float> mIndents;
83};
84
85class TabStops {
86    public:
87        void set(const int* stops, size_t nStops, int tabWidth) {
88            if (stops != nullptr) {
89                mStops.assign(stops, stops + nStops);
90            } else {
91                mStops.clear();
92            }
93            mTabWidth = tabWidth;
94        }
95        float nextTab(float widthSoFar) const {
96            for (size_t i = 0; i < mStops.size(); i++) {
97                if (mStops[i] > widthSoFar) {
98                    return mStops[i];
99                }
100            }
101            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
102        }
103    private:
104        std::vector<int> mStops;
105        int mTabWidth;
106};
107
108class LineBreaker {
109    public:
110        const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java
111
112        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
113        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
114        // be set on the first invocation, but callers are encouraged not to call again unless
115        // locale has actually changed.
116        // That logic could be here but it's better for performance that it's upstream because of
117        // the cost of constructing and comparing the ICU Locale object.
118        // Note: caller is responsible for managing lifetime of hyphenator
119        void setLocales(const char* locales, const std::vector<Hyphenator*>& hyphenators);
120
121        void resize(size_t size) {
122            mTextBuf.resize(size);
123            mCharWidths.resize(size);
124        }
125
126        size_t size() const {
127            return mTextBuf.size();
128        }
129
130        uint16_t* buffer() {
131            return mTextBuf.data();
132        }
133
134        float* charWidths() {
135            return mCharWidths.data();
136        }
137
138        // set text to current contents of buffer
139        void setText();
140
141        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
142
143        void setIndents(const std::vector<float>& indents);
144
145        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
146            mTabStops.set(stops, nStops, tabWidth);
147        }
148
149        BreakStrategy getStrategy() const { return mStrategy; }
150
151        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
152
153        void setJustified(bool justified) { mJustified = justified; }
154
155        HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; }
156
157        void setHyphenationFrequency(HyphenationFrequency frequency) {
158            mHyphenationFrequency = frequency;
159        }
160
161        // TODO: this class is actually fairly close to being general and not tied to using
162        // Minikin to do the shaping of the strings. The main thing that would need to be changed
163        // is having some kind of callback (or virtual class, or maybe even template), which could
164        // easily be instantiated with Minikin's Layout. Future work for when needed.
165        float addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
166                FontStyle style, size_t start, size_t end, bool isRtl);
167
168        void addReplacement(size_t start, size_t end, float width);
169
170        size_t computeBreaks();
171
172        const int* getBreaks() const {
173            return mBreaks.data();
174        }
175
176        const float* getWidths() const {
177            return mWidths.data();
178        }
179
180        const int* getFlags() const {
181            return mFlags.data();
182        }
183
184        void finish();
185
186    private:
187        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
188        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
189        // that float is used extensively on the Java side for this. This is a typedef so that
190        // we can easily change it based on performance/accuracy tradeoff.
191        typedef double ParaWidth;
192
193        // A single candidate break
194        struct Candidate {
195            size_t offset;  // offset to text buffer, in code units
196            size_t prev;  // index to previous break
197            ParaWidth preBreak;  // width of text until this point, if we decide to not break here
198            ParaWidth postBreak;  // width of text until this point, if we decide to break here
199            float penalty;  // penalty of this break (for example, hyphen penalty)
200            float score;  // best score found for this break
201            size_t lineNumber;  // only updated for non-constant line widths
202            size_t preSpaceCount;  // preceding space count before breaking
203            size_t postSpaceCount;  // preceding space count after breaking
204            HyphenationType hyphenType;
205        };
206
207        float currentLineWidth() const;
208
209        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
210                size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph);
211
212        void addCandidate(Candidate cand);
213        void pushGreedyBreak();
214
215        // push an actual break to the output. Takes care of setting flags for tab
216        void pushBreak(int offset, float width, uint8_t hyphenEdit);
217
218        float getSpaceWidth() const;
219
220        void computeBreaksGreedy();
221
222        void computeBreaksOptimal(bool isRectangular);
223
224        void finishBreaksOptimal();
225
226        WordBreaker mWordBreaker;
227        icu::Locale mLocale;
228        std::vector<uint16_t>mTextBuf;
229        std::vector<float>mCharWidths;
230
231        Hyphenator* mHyphenator;
232        std::vector<HyphenationType> mHyphBuf;
233
234        // layout parameters
235        BreakStrategy mStrategy = kBreakStrategy_Greedy;
236        HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
237        bool mJustified;
238        LineWidths mLineWidths;
239        TabStops mTabStops;
240
241        // result of line breaking
242        std::vector<int> mBreaks;
243        std::vector<float> mWidths;
244        std::vector<int> mFlags;
245
246        ParaWidth mWidth = 0;
247        std::vector<Candidate> mCandidates;
248        float mLinePenalty = 0.0f;
249
250        // the following are state for greedy breaker (updated while adding style runs)
251        size_t mLastBreak;
252        size_t mBestBreak;
253        float mBestScore;
254        ParaWidth mPreBreak;  // prebreak of last break
255        uint32_t mLastHyphenation;  // hyphen edit of last break kept for next line
256        int mFirstTabIndex;
257        size_t mSpaceCount;
258
259        FRIEND_TEST(LineBreakerTest, setLocales);
260};
261
262}  // namespace minikin
263
264#endif  // MINIKIN_LINE_BREAKER_H
265