1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/** 18 * A wrapper around ICU's line break iterator, that gives customized line 19 * break opportunities, as well as identifying words for the purpose of 20 * hyphenation. 21 */ 22 23#ifndef MINIKIN_WORD_BREAKER_H 24#define MINIKIN_WORD_BREAKER_H 25 26#include "unicode/brkiter.h" 27#include <memory> 28 29namespace android { 30 31class WordBreaker { 32public: 33 ~WordBreaker() { 34 finish(); 35 } 36 37 void setLocale(const icu::Locale& locale); 38 39 void setText(const uint16_t* data, size_t size); 40 41 // Advance iterator to next word break. Return offset, or -1 if EOT 42 ssize_t next(); 43 44 // Current offset of iterator, equal to 0 at BOT or last return from next() 45 ssize_t current() const; 46 47 // After calling next(), wordStart() and wordEnd() are offsets defining the previous 48 // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. 49 ssize_t wordStart() const; 50 51 ssize_t wordEnd() const; 52 53 int breakBadness() const; 54 55 void finish(); 56 57private: 58 std::unique_ptr<icu::BreakIterator> mBreakIterator; 59 UText mUText = UTEXT_INITIALIZER; 60 const uint16_t* mText = nullptr; 61 size_t mTextSize; 62 ssize_t mLast; 63 ssize_t mCurrent; 64 bool mIteratorWasReset; 65 66 // state for the email address / url detector 67 ssize_t mScanOffset; 68 bool mInEmailOrUrl; 69}; 70 71} // namespace 72 73#endif // MINIKIN_WORD_BREAKER_H 74