LineBreaker.h revision 01f526614431e3a0a6e1a48039e00b8a9b7d6fbf
101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien/*
201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * Copyright (C) 2015 The Android Open Source Project
301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien *
401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * Licensed under the Apache License, Version 2.0 (the "License");
501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * you may not use this file except in compliance with the License.
601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * You may obtain a copy of the License at
701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien *
801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien *      http://www.apache.org/licenses/LICENSE-2.0
901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien *
1001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * Unless required by applicable law or agreed to in writing, software
1101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * distributed under the License is distributed on an "AS IS" BASIS,
1201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * See the License for the specific language governing permissions and
1401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * limitations under the License.
1501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien */
1601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
1701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien/**
1801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * A module for breaking paragraphs into lines, supporting high quality
1901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * hyphenation and justification.
2001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien */
2101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
2201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#ifndef MINIKIN_LINE_BREAKER_H
2301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#define MINIKIN_LINE_BREAKER_H
2401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
2501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#include "unicode/brkiter.h"
2601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#include "unicode/locid.h"
2701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#include <cmath>
2801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#include <vector>
2901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
3001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Leviennamespace android {
3101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
3201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levienenum BreakStrategy {
3301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    kBreakStrategy_Greedy = 0,
3401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    kBreakStrategy_HighQuality = 1,
3501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    kBreakStrategy_Balanced = 2
3601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien};
3701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
3801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien// TODO: want to generalize to be able to handle array of line widths
3901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levienclass LineWidths {
4001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    public:
4101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
4201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            mFirstWidth = firstWidth;
4301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            mFirstWidthLineCount = firstWidthLineCount;
4401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            mRestWidth = restWidth;
4501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
4601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        float getLineWidth(int line) const {
4701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            return (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
4801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
4901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    private:
5001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        float mFirstWidth;
5101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        int mFirstWidthLineCount;
5201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        float mRestWidth;
5301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien};
5401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
5501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levienclass TabStops {
5601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    public:
5701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void set(const int* stops, size_t nStops, int tabWidth) {
5801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            if (stops != nullptr) {
5901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien                mStops.assign(stops, stops + nStops);
6001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            } else {
6101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien                mStops.clear();
6201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            }
6301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            mTabWidth = tabWidth;
6401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
6501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        float nextTab(float widthSoFar) const {
6601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            for (size_t i = 0; i < mStops.size(); i++) {
6701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien                if (mStops[i] > widthSoFar) {
6801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien                    return mStops[i];
6901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien                }
7001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            }
7101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
7201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
7301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    private:
7401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        std::vector<int> mStops;
7501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        int mTabWidth;
7601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien};
7701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
7801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levienclass LineBreaker {
7901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    public:
8001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        ~LineBreaker() {
8101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            utext_close(&mUText);
8201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            delete mBreakIterator;
8301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
8401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
8501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
8601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
8701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // be set on the first invocation, but callers are encouraged not to call again unless
8801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // locale has actually changed.
8901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // That logic could be here but it's better for performance that it's upstream because of
9001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // the cost of constructing and comparing the ICU Locale object.
9101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void setLocale(const icu::Locale& locale) {
9201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            delete mBreakIterator;
9301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            UErrorCode status = U_ZERO_ERROR;
9401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
9501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            // TODO: check status
9601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            // TODO: load hyphenator from locale
9701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
9801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
9901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void resize(size_t size) {
10001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            mTextBuf.resize(size);
10101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            mCharWidths.resize(size);
10201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
10301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
10401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        size_t size() const {
10501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            return mTextBuf.size();
10601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
10701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
10801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        uint16_t* buffer() {
10901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            return mTextBuf.data();
11001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
11101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
11201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        float* charWidths() {
11301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            return mCharWidths.data();
11401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
11501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
11601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // set text to current contents of buffer
11701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void setText();
11801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
11901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
12001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
12101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
12201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            mTabStops.set(stops, nStops, tabWidth);
12301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
12401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
12501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        BreakStrategy getStrategy() const { return mStrategy; }
12601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
12701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
12801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
12901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // TODO: this class is actually fairly close to being general and not tied to using
13001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // Minikin to do the shaping of the strings. The main thing that would need to be changed
13101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // is having some kind of callback (or virtual class, or maybe even template), which could
13201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // easily be instantiated with Minikin's Layout. Future work for when needed.
13301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        float addStyleRun(const MinikinPaint* paint, const FontCollection* typeface,
13401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien                FontStyle style, size_t start, size_t end, bool isRtl);
13501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
13601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void addReplacement(size_t start, size_t end, float width);
13701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
13801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        size_t computeBreaks();
13901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
14001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        const int* getBreaks() const {
14101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            return mBreaks.data();
14201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
14301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
14401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        const float* getWidths() const {
14501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            return mWidths.data();
14601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
14701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
14801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        const uint8_t* getFlags() const {
14901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            return mFlags.data();
15001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        }
15101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
15201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void finish();
15301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
15401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien    private:
15501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
15601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
15701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // that float is used extensively on the Java side for this. This is a typedef so that
15801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // we can easily change it based on performance/accuracy tradeoff.
15901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        typedef double ParaWidth;
16001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
16101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // A single candidate break
16201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        struct Candidate {
16301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            size_t offset;  // offset to text buffer, in code units
16401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            size_t prev;  // index to previous break
16501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            ParaWidth preBreak;
16601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            ParaWidth postBreak;
16701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            float penalty;  // penalty of this break (for example, hyphen penalty)
16801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien            float score;  // best score found for this break
16901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        };
17001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
17101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        float currentLineWidth() const;
17201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
17301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty);
17401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
17501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void addCandidate(Candidate cand);
17601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
17701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void computeBreaksGreedy();
17801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
17901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        void computeBreaksOpt();
18001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
18101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        icu::BreakIterator* mBreakIterator = nullptr;
18201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        UText mUText = UTEXT_INITIALIZER;
18301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        std::vector<uint16_t>mTextBuf;
18401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        std::vector<float>mCharWidths;
18501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
18601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // layout parameters
18701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        BreakStrategy mStrategy = kBreakStrategy_Greedy;
18801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        LineWidths mLineWidths;
18901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        TabStops mTabStops;
19001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
19101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // result of line breaking
19201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        std::vector<int> mBreaks;
19301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        std::vector<float> mWidths;
19401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        std::vector<uint8_t> mFlags;
19501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
19601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        ParaWidth mWidth = 0;
19701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        std::vector<Candidate> mCandidates;
19801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
19901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        // the following are state for greedy breaker (updated while adding style runs)
20001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        size_t mLastBreak;
20101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        size_t mBestBreak;
20201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        float mBestScore;
20301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        ParaWidth mPreBreak;  // prebreak of last break
20401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien        int mFirstTabIndex;
20501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien};
20601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
20701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien}  // namespace android
20801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien
20901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#endif  // MINIKIN_LINE_BREAKER_H
210