1cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi/* 2cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 3cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * 4cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * you may not use this file except in compliance with the License. 6cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * You may obtain a copy of the License at 7cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * 8cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * 10cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * See the License for the specific language governing permissions and 14cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi * limitations under the License. 15cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi */ 16cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi 17cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" 18cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi 19cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h" 20cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi 21cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanaginamespace latinime { 22cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanaginamespace dicttoolkit { 23cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi 24cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagibool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) { 25cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi const CodePointArrayView codePoints = wordProperty.getCodePoints(); 26cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) { 27cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return false; 28cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 29cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return addWordInner(codePoints, wordProperty, mRootPtNodeArray); 30cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi} 31cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi 32cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagibool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints, 33cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) { 34cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi auto ptNodeList = ptNodeArray.getMutablePtNodeList(); 35cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi auto ptNodeIt = ptNodeList->begin(); 36cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) { 37cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi const auto &ptNode = *ptNodeIt; 38cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); 39cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints[0] < ptNodeCodePoints[0]) { 40cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi continue; 41cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 42cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints[0] > ptNodeCodePoints[0]) { 43cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi break; 44cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 45cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi size_t i = 1; 46cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi for (; i < codePoints.size(); ++i) { 47cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (i >= ptNodeCodePoints.size()) { 48cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi // Add new child. 49cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return addWordInner(codePoints.skip(i), wordProperty, 50cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ptNode->getChildrenPtNodeArray()); 51cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 52cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints[i] != ptNodeCodePoints[i]) { 53cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi break; 54cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 55cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 56cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) { 57cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi // All code points matched. 58cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (ptNode->getWordProperty()) { 59cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi // Adding the same word multiple times is not supported. 60cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return false; 61cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 62cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ptNodeList->insert(ptNodeIt, 63cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode)); 64cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ptNodeList->erase(ptNodeIt); 65cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return true; 66cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 67cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi // The (i+1)-th elements are different. 68cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi // Create and Add new parent ptNode for the common part. 69cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi auto newPtNode = codePoints.size() == i 70cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty) 71cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i)); 72cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ptNodeList->insert(ptNodeIt, newPtNode); 73cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray = 74cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi newPtNode->getChildrenPtNodeArray(); 75cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi // Add new child for the existing ptNode. 76cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi childrenPtNodeArray.getMutablePtNodeList()->push_back( 77cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi std::make_shared<OffdeviceIntermediateDictPtNode>( 78cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ptNodeCodePoints.skip(i), *ptNode)); 79cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ptNodeList->erase(ptNodeIt); 80cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints.size() != i) { 81cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi // Add a child for the new word. 82cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray); 83cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 84cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return true; 85cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 86cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ptNodeList->insert(ptNodeIt, 87cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)); 88cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return true; 89cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi} 90cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi 91cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagiconst WordProperty *OffdeviceIntermediateDict::getWordProperty( 92cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi const CodePointArrayView codePoints) const { 93cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray; 94cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi for (size_t i = 0; i < codePoints.size();) { 95cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi bool foundNext = false; 96cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi for (const auto ptNode : ptNodeArray->getPtNodeList()) { 97cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); 98cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints[i] < ptNodeCodePoints[0]) { 99cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi continue; 100cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 101cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints[i] > ptNodeCodePoints[0] 102cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi || codePoints.size() < ptNodeCodePoints.size()) { 103cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return nullptr; 104cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 105cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) { 106cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (codePoints[i + j] != ptNodeCodePoints[j]) { 107cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return nullptr; 108cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 109cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 110cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi i += ptNodeCodePoints.size(); 111cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (i == codePoints.size()) { 112cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return ptNode->getWordProperty(); 113cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 114cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi ptNodeArray = &ptNode->getChildrenPtNodeArray(); 115cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi foundNext = true; 116cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi break; 117cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 118cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi if (!foundNext) { 119cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi break; 120cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 121cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi } 122cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi return nullptr; 123cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi} 124cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi 125cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi} // namespace dicttoolkit 126cd105409739db85f5f325e7cca322dadb12cded7Keisuke Kuroyanagi} // namespace latinime 127