Hyphenator.h revision 5cdad92c300a65cab89b172e952186f0c5870657
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * An implementation of Liang's hyphenation algorithm.
19 */
20
21#include <memory>
22#include <unordered_map>
23
24#ifndef MINIKIN_HYPHENATOR_H
25#define MINIKIN_HYPHENATOR_H
26
27namespace android {
28
29class Trie {
30public:
31    std::vector<uint8_t> result;
32    std::unordered_map<uint16_t, Trie> succ;
33};
34
35class Hyphenator {
36public:
37    // Note: this will also require a locale, for proper case folding behavior
38    static Hyphenator* load(const uint16_t* patternData, size_t size);
39
40    // Compute the hyphenation of a word, storing the hyphenation in result vector. Each
41    // entry in the vector is a "hyphen edit" to be applied at the corresponding code unit
42    // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen and break,
43    // but this will be expanded to other edits for nonstandard hyphenation.
44    // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-phen".
45    void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
46
47private:
48    void addPattern(const uint16_t* pattern, size_t size);
49
50    void hyphenateSoft(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
51
52    // TODO: these should become parameters, as they might vary by locale, screen size, and
53    // possibly explicit user control.
54    static const int MIN_PREFIX = 2;
55    static const int MIN_SUFFIX = 3;
56
57    Trie root;
58};
59
60}  // namespace android
61
62#endif   // MINIKIN_HYPHENATOR_H