LineBreaker.h revision 0b25d5ac85533f64764a0d53d5e5d33b46b715fa
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * A module for breaking paragraphs into lines, supporting high quality
19 * hyphenation and justification.
20 */
21
22#ifndef MINIKIN_LINE_BREAKER_H
23#define MINIKIN_LINE_BREAKER_H
24
25#include "unicode/brkiter.h"
26#include "unicode/locid.h"
27#include <cmath>
28#include <vector>
29
30namespace android {
31
32enum BreakStrategy {
33    kBreakStrategy_Greedy = 0,
34    kBreakStrategy_HighQuality = 1,
35    kBreakStrategy_Balanced = 2
36};
37
38// TODO: want to generalize to be able to handle array of line widths
39class LineWidths {
40    public:
41        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
42            mFirstWidth = firstWidth;
43            mFirstWidthLineCount = firstWidthLineCount;
44            mRestWidth = restWidth;
45        }
46        float getLineWidth(int line) const {
47            return (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
48        }
49    private:
50        float mFirstWidth;
51        int mFirstWidthLineCount;
52        float mRestWidth;
53};
54
55class TabStops {
56    public:
57        void set(const int* stops, size_t nStops, int tabWidth) {
58            if (stops != nullptr) {
59                mStops.assign(stops, stops + nStops);
60            } else {
61                mStops.clear();
62            }
63            mTabWidth = tabWidth;
64        }
65        float nextTab(float widthSoFar) const {
66            for (size_t i = 0; i < mStops.size(); i++) {
67                if (mStops[i] > widthSoFar) {
68                    return mStops[i];
69                }
70            }
71            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
72        }
73    private:
74        std::vector<int> mStops;
75        int mTabWidth;
76};
77
78class LineBreaker {
79    public:
80        ~LineBreaker() {
81            utext_close(&mUText);
82            delete mBreakIterator;
83        }
84
85        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
86        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
87        // be set on the first invocation, but callers are encouraged not to call again unless
88        // locale has actually changed.
89        // That logic could be here but it's better for performance that it's upstream because of
90        // the cost of constructing and comparing the ICU Locale object.
91        void setLocale(const icu::Locale& locale) {
92            delete mBreakIterator;
93            UErrorCode status = U_ZERO_ERROR;
94            mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
95            // TODO: check status
96            // TODO: load hyphenator from locale
97        }
98
99        void resize(size_t size) {
100            mTextBuf.resize(size);
101            mCharWidths.resize(size);
102        }
103
104        size_t size() const {
105            return mTextBuf.size();
106        }
107
108        uint16_t* buffer() {
109            return mTextBuf.data();
110        }
111
112        float* charWidths() {
113            return mCharWidths.data();
114        }
115
116        // set text to current contents of buffer
117        void setText();
118
119        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
120
121        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
122            mTabStops.set(stops, nStops, tabWidth);
123        }
124
125        BreakStrategy getStrategy() const { return mStrategy; }
126
127        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
128
129        // TODO: this class is actually fairly close to being general and not tied to using
130        // Minikin to do the shaping of the strings. The main thing that would need to be changed
131        // is having some kind of callback (or virtual class, or maybe even template), which could
132        // easily be instantiated with Minikin's Layout. Future work for when needed.
133        float addStyleRun(const MinikinPaint* paint, const FontCollection* typeface,
134                FontStyle style, size_t start, size_t end, bool isRtl);
135
136        void addReplacement(size_t start, size_t end, float width);
137
138        size_t computeBreaks();
139
140        const int* getBreaks() const {
141            return mBreaks.data();
142        }
143
144        const float* getWidths() const {
145            return mWidths.data();
146        }
147
148        const uint8_t* getFlags() const {
149            return mFlags.data();
150        }
151
152        void finish();
153
154    private:
155        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
156        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
157        // that float is used extensively on the Java side for this. This is a typedef so that
158        // we can easily change it based on performance/accuracy tradeoff.
159        typedef double ParaWidth;
160
161        // A single candidate break
162        struct Candidate {
163            size_t offset;  // offset to text buffer, in code units
164            size_t prev;  // index to previous break
165            ParaWidth preBreak;
166            ParaWidth postBreak;
167            float penalty;  // penalty of this break (for example, hyphen penalty)
168            float score;  // best score found for this break
169        };
170
171        float currentLineWidth() const;
172
173        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty);
174
175        void addCandidate(Candidate cand);
176
177        void computeBreaksGreedy();
178
179        void computeBreaksOpt();
180
181        icu::BreakIterator* mBreakIterator = nullptr;
182        UText mUText = UTEXT_INITIALIZER;
183        std::vector<uint16_t>mTextBuf;
184        std::vector<float>mCharWidths;
185
186        // layout parameters
187        BreakStrategy mStrategy = kBreakStrategy_Greedy;
188        LineWidths mLineWidths;
189        TabStops mTabStops;
190
191        // result of line breaking
192        std::vector<int> mBreaks;
193        std::vector<float> mWidths;
194        std::vector<uint8_t> mFlags;
195
196        ParaWidth mWidth = 0;
197        std::vector<Candidate> mCandidates;
198
199        // the following are state for greedy breaker (updated while adding style runs)
200        size_t mLastBreak;
201        size_t mBestBreak;
202        float mBestScore;
203        ParaWidth mPreBreak;  // prebreak of last break
204        int mFirstTabIndex;
205};
206
207}  // namespace android
208
209#endif  // MINIKIN_LINE_BREAKER_H
210