WordBreaker.h revision 57b6dae9894b9362ef04517ff477fd491f9d433b
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * A wrapper around ICU's line break iterator, that gives customized line
19 * break opportunities, as well as identifying words for the purpose of
20 * hyphenation.
21 */
22
23#ifndef MINIKIN_WORD_BREAKER_H
24#define MINIKIN_WORD_BREAKER_H
25
26#include "unicode/brkiter.h"
27#include <memory>
28
29namespace android {
30
31class WordBreaker {
32public:
33    ~WordBreaker() {
34        finish();
35    }
36
37    void setLocale(const icu::Locale& locale);
38
39    void setText(const uint16_t* data, size_t size);
40
41    // Advance iterator to next word break. Return offset, or -1 if EOT
42    ssize_t next();
43
44    // Current offset of iterator, equal to 0 at BOT or last return from next()
45    ssize_t current() const;
46
47    // After calling next(), wordStart() and wordEnd() are offsets defining the previous
48    // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation.
49    ssize_t wordStart() const;
50
51    ssize_t wordEnd() const;
52
53    void finish();
54
55private:
56    std::unique_ptr<icu::BreakIterator> mBreakIterator;
57    UText mUText = UTEXT_INITIALIZER;
58    const uint16_t* mText = nullptr;
59    size_t mTextSize;
60    ssize_t mLast;
61    ssize_t mCurrent;
62    bool mIteratorWasReset;
63};
64
65}  // namespace
66
67#endif  // MINIKIN_WORD_BREAKER_H
68