1cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi/*
2cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project
3cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi *
4cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * you may not use this file except in compliance with the License.
6cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * You may obtain a copy of the License at
7cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi *
8cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi *      http://www.apache.org/licenses/LICENSE-2.0
9cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi *
10cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * See the License for the specific language governing permissions and
14cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * limitations under the License.
15cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi */
16cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi
17cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
18cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi
19cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
20cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi
21cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanaginamespace latinime {
22cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanaginamespace dicttoolkit {
23cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi
24cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagibool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
25cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    const CodePointArrayView codePoints = wordProperty.getCodePoints();
26cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
27cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        return false;
28cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    }
29cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
30cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi}
31cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi
32cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagibool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
33cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
34cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    auto ptNodeList = ptNodeArray.getMutablePtNodeList();
35cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    auto ptNodeIt = ptNodeList->begin();
36cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
37cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        const auto &ptNode = *ptNodeIt;
38cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
39cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        if (codePoints[0] < ptNodeCodePoints[0]) {
40cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            continue;
41cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        }
42cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        if (codePoints[0] > ptNodeCodePoints[0]) {
43cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            break;
44cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        }
45cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        size_t i = 1;
46cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        for (; i < codePoints.size(); ++i) {
47cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            if (i >= ptNodeCodePoints.size()) {
48cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                // Add new child.
49cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                return addWordInner(codePoints.skip(i), wordProperty,
50cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                        ptNode->getChildrenPtNodeArray());
51cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            }
52cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            if (codePoints[i] != ptNodeCodePoints[i]) {
53cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                break;
54cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            }
55cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        }
56cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
57cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            // All code points matched.
58cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            if (ptNode->getWordProperty()) {
59cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                //  Adding the same word multiple times is not supported.
60cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                return false;
61cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            }
62cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            ptNodeList->insert(ptNodeIt,
63cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                    std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
64cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            ptNodeList->erase(ptNodeIt);
65cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            return true;
66cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        }
67cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        // The (i+1)-th elements are different.
68cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        // Create and Add new parent ptNode for the common part.
69cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        auto newPtNode = codePoints.size() == i
70cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
71cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
72cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        ptNodeList->insert(ptNodeIt, newPtNode);
73cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
74cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                newPtNode->getChildrenPtNodeArray();
75cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        // Add new child for the existing ptNode.
76cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        childrenPtNodeArray.getMutablePtNodeList()->push_back(
77cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                std::make_shared<OffdeviceIntermediateDictPtNode>(
78cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                        ptNodeCodePoints.skip(i), *ptNode));
79cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        ptNodeList->erase(ptNodeIt);
80cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        if (codePoints.size() != i) {
81cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            // Add a child for the new word.
82cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
83cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        }
84cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        return true;
85cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    }
86cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    ptNodeList->insert(ptNodeIt,
87cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
88cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    return true;
89cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi}
90cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi
91cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagiconst WordProperty *OffdeviceIntermediateDict::getWordProperty(
92cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        const CodePointArrayView codePoints) const {
93cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
94cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    for (size_t i = 0; i < codePoints.size();) {
95cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        bool foundNext = false;
96cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        for (const auto ptNode : ptNodeArray->getPtNodeList()) {
97cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
98cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            if (codePoints[i] < ptNodeCodePoints[0]) {
99cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                continue;
100cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            }
101cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            if (codePoints[i] > ptNodeCodePoints[0]
102cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                     || codePoints.size() < ptNodeCodePoints.size()) {
103cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                return nullptr;
104cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            }
105cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
106cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                if (codePoints[i + j] != ptNodeCodePoints[j]) {
107cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                    return nullptr;
108cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                }
109cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            }
110cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            i += ptNodeCodePoints.size();
111cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            if (i == codePoints.size()) {
112cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi                return ptNode->getWordProperty();
113cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            }
114cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            ptNodeArray = &ptNode->getChildrenPtNodeArray();
115cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            foundNext = true;
116cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            break;
117cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        }
118cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        if (!foundNext) {
119cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi            break;
120cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi        }
121cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    }
122cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi    return nullptr;
123cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi}
124cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi
125cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi} // namespace dicttoolkit
126cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi} // namespace latinime
127