WordBreaker.cpp revision 57b6dae9894b9362ef04517ff477fd491f9d433b
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "Minikin" 18#include <cutils/log.h> 19 20#include "minikin/WordBreaker.h" 21 22#include <unicode/uchar.h> 23#include <unicode/utf16.h> 24 25namespace android { 26 27const uint32_t CHAR_SOFT_HYPHEN = 0x00AD; 28 29void WordBreaker::setLocale(const icu::Locale& locale) { 30 UErrorCode status = U_ZERO_ERROR; 31 mBreakIterator.reset(icu::BreakIterator::createLineInstance(locale, status)); 32 // TODO: handle failure status 33 if (mText != nullptr) { 34 mBreakIterator->setText(&mUText, status); 35 } 36 mIteratorWasReset = true; 37} 38 39void WordBreaker::setText(const uint16_t* data, size_t size) { 40 mText = data; 41 mTextSize = size; 42 mIteratorWasReset = false; 43 mLast = 0; 44 mCurrent = 0; 45 UErrorCode status = U_ZERO_ERROR; 46 utext_openUChars(&mUText, data, size, &status); 47 mBreakIterator->setText(&mUText, status); 48 mBreakIterator->first(); 49} 50 51ssize_t WordBreaker::current() const { 52 return mCurrent; 53} 54 55ssize_t WordBreaker::next() { 56 int32_t result; 57 mLast = mCurrent; 58 do { 59 if (mIteratorWasReset) { 60 result = mBreakIterator->following(mCurrent); 61 mIteratorWasReset = false; 62 } else { 63 result = mBreakIterator->next(); 64 } 65 } while (result != icu::BreakIterator::DONE && (size_t)result != mTextSize 66 && mText[result - 1] == CHAR_SOFT_HYPHEN); 67 mCurrent = (ssize_t)result; 68 return mCurrent; 69} 70 71ssize_t WordBreaker::wordStart() const { 72 ssize_t result = mLast; 73 while (result < mCurrent) { 74 UChar32 c; 75 ssize_t ix = result; 76 U16_NEXT(mText, ix, mCurrent, c); 77 int32_t lb = u_getIntPropertyValue(c, UCHAR_LINE_BREAK); 78 // strip leading punctuation, defined as OP and QU line breaking classes, 79 // see UAX #14 80 if (!(lb == U_LB_OPEN_PUNCTUATION || lb == U_LB_QUOTATION)) { 81 break; 82 } 83 result = ix; 84 } 85 return result; 86} 87 88ssize_t WordBreaker::wordEnd() const { 89 ssize_t result = mCurrent; 90 while (result > mLast) { 91 UChar32 c; 92 ssize_t ix = result; 93 U16_PREV(mText, mLast, ix, c); 94 int32_t gc_mask = U_GET_GC_MASK(c); 95 // strip trailing space and punctuation 96 if ((gc_mask & (U_GC_ZS_MASK | U_GC_P_MASK)) == 0) { 97 break; 98 } 99 result = ix; 100 } 101 return result; 102} 103 104void WordBreaker::finish() { 105 mText = nullptr; 106 // Note: calling utext_close multiply is safe 107 utext_close(&mUText); 108} 109 110} // namespace android 111