LineBreaker.h revision 01f526614431e3a0a6e1a48039e00b8a9b7d6fbf
101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien/* 201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * Copyright (C) 2015 The Android Open Source Project 301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * 401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * Licensed under the Apache License, Version 2.0 (the "License"); 501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * you may not use this file except in compliance with the License. 601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * You may obtain a copy of the License at 701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * 801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * http://www.apache.org/licenses/LICENSE-2.0 901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * 1001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * Unless required by applicable law or agreed to in writing, software 1101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * distributed under the License is distributed on an "AS IS" BASIS, 1201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * See the License for the specific language governing permissions and 1401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * limitations under the License. 1501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien */ 1601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 1701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien/** 1801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * A module for breaking paragraphs into lines, supporting high quality 1901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien * hyphenation and justification. 2001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien */ 2101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 2201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#ifndef MINIKIN_LINE_BREAKER_H 2301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#define MINIKIN_LINE_BREAKER_H 2401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 2501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#include "unicode/brkiter.h" 2601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#include "unicode/locid.h" 2701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#include <cmath> 2801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#include <vector> 2901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 3001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Leviennamespace android { 3101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 3201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levienenum BreakStrategy { 3301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien kBreakStrategy_Greedy = 0, 3401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien kBreakStrategy_HighQuality = 1, 3501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien kBreakStrategy_Balanced = 2 3601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien}; 3701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 3801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien// TODO: want to generalize to be able to handle array of line widths 3901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levienclass LineWidths { 4001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien public: 4101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 4201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mFirstWidth = firstWidth; 4301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mFirstWidthLineCount = firstWidthLineCount; 4401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mRestWidth = restWidth; 4501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 4601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float getLineWidth(int line) const { 4701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth; 4801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 4901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien private: 5001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float mFirstWidth; 5101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien int mFirstWidthLineCount; 5201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float mRestWidth; 5301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien}; 5401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 5501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levienclass TabStops { 5601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien public: 5701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void set(const int* stops, size_t nStops, int tabWidth) { 5801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien if (stops != nullptr) { 5901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mStops.assign(stops, stops + nStops); 6001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } else { 6101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mStops.clear(); 6201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 6301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mTabWidth = tabWidth; 6401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 6501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float nextTab(float widthSoFar) const { 6601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien for (size_t i = 0; i < mStops.size(); i++) { 6701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien if (mStops[i] > widthSoFar) { 6801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return mStops[i]; 6901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 7001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 7101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return floor(widthSoFar / mTabWidth + 1) * mTabWidth; 7201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 7301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien private: 7401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien std::vector<int> mStops; 7501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien int mTabWidth; 7601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien}; 7701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 7801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levienclass LineBreaker { 7901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien public: 8001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien ~LineBreaker() { 8101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien utext_close(&mUText); 8201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien delete mBreakIterator; 8301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 8401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 8501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // Note: Locale persists across multiple invocations (it is not cleaned up by finish()), 8601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always 8701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // be set on the first invocation, but callers are encouraged not to call again unless 8801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // locale has actually changed. 8901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // That logic could be here but it's better for performance that it's upstream because of 9001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // the cost of constructing and comparing the ICU Locale object. 9101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void setLocale(const icu::Locale& locale) { 9201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien delete mBreakIterator; 9301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien UErrorCode status = U_ZERO_ERROR; 9401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mBreakIterator = icu::BreakIterator::createLineInstance(locale, status); 9501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // TODO: check status 9601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // TODO: load hyphenator from locale 9701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 9801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 9901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void resize(size_t size) { 10001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mTextBuf.resize(size); 10101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mCharWidths.resize(size); 10201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 10301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 10401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien size_t size() const { 10501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return mTextBuf.size(); 10601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 10701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 10801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien uint16_t* buffer() { 10901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return mTextBuf.data(); 11001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 11101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 11201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float* charWidths() { 11301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return mCharWidths.data(); 11401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 11501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 11601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // set text to current contents of buffer 11701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void setText(); 11801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 11901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth); 12001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 12101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void setTabStops(const int* stops, size_t nStops, int tabWidth) { 12201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien mTabStops.set(stops, nStops, tabWidth); 12301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 12401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 12501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien BreakStrategy getStrategy() const { return mStrategy; } 12601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 12701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void setStrategy(BreakStrategy strategy) { mStrategy = strategy; } 12801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 12901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // TODO: this class is actually fairly close to being general and not tied to using 13001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // Minikin to do the shaping of the strings. The main thing that would need to be changed 13101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // is having some kind of callback (or virtual class, or maybe even template), which could 13201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // easily be instantiated with Minikin's Layout. Future work for when needed. 13301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float addStyleRun(const MinikinPaint* paint, const FontCollection* typeface, 13401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien FontStyle style, size_t start, size_t end, bool isRtl); 13501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 13601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void addReplacement(size_t start, size_t end, float width); 13701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 13801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien size_t computeBreaks(); 13901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 14001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien const int* getBreaks() const { 14101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return mBreaks.data(); 14201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 14301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 14401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien const float* getWidths() const { 14501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return mWidths.data(); 14601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 14701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 14801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien const uint8_t* getFlags() const { 14901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien return mFlags.data(); 15001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien } 15101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 15201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void finish(); 15301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 15401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien private: 15501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for 15601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // very large paragraphs, accuracy could degrade using only 32-bit float. Note however 15701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // that float is used extensively on the Java side for this. This is a typedef so that 15801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // we can easily change it based on performance/accuracy tradeoff. 15901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien typedef double ParaWidth; 16001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 16101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // A single candidate break 16201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien struct Candidate { 16301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien size_t offset; // offset to text buffer, in code units 16401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien size_t prev; // index to previous break 16501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien ParaWidth preBreak; 16601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien ParaWidth postBreak; 16701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float penalty; // penalty of this break (for example, hyphen penalty) 16801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float score; // best score found for this break 16901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien }; 17001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 17101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float currentLineWidth() const; 17201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 17301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty); 17401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 17501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void addCandidate(Candidate cand); 17601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 17701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void computeBreaksGreedy(); 17801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 17901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien void computeBreaksOpt(); 18001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 18101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien icu::BreakIterator* mBreakIterator = nullptr; 18201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien UText mUText = UTEXT_INITIALIZER; 18301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien std::vector<uint16_t>mTextBuf; 18401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien std::vector<float>mCharWidths; 18501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 18601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // layout parameters 18701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien BreakStrategy mStrategy = kBreakStrategy_Greedy; 18801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien LineWidths mLineWidths; 18901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien TabStops mTabStops; 19001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 19101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // result of line breaking 19201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien std::vector<int> mBreaks; 19301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien std::vector<float> mWidths; 19401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien std::vector<uint8_t> mFlags; 19501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 19601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien ParaWidth mWidth = 0; 19701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien std::vector<Candidate> mCandidates; 19801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 19901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien // the following are state for greedy breaker (updated while adding style runs) 20001f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien size_t mLastBreak; 20101f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien size_t mBestBreak; 20201f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien float mBestScore; 20301f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien ParaWidth mPreBreak; // prebreak of last break 20401f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien int mFirstTabIndex; 20501f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien}; 20601f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 20701f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien} // namespace android 20801f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien 20901f526614431e3a0a6e1a48039e00b8a9b7d6fbfRaph Levien#endif // MINIKIN_LINE_BREAKER_H 210