dictionary_utils.cpp revision c32356c2291a6de8adf21698e6467f30b5f2e31c
1d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* 2d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project 3d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * 4d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * you may not use this file except in compliance with the License. 6d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * You may obtain a copy of the License at 7d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * 8d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * 10d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * See the License for the specific language governing permissions and 14d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * limitations under the License. 15d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi */ 16d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 17d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/dictionary_utils.h" 18d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 19d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node.h" 20d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node_priority_queue.h" 21d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node_vector.h" 22d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/dictionary.h" 23d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/digraph_utils.h" 24d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/session/prev_words_info.h" 25d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 26d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 27d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanaginamespace latinime { 28d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 29d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches( 30d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 31d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const int *const codePoints, const int codePointCount) { 32d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi std::vector<DicNode> current; 33d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi std::vector<DicNode> next; 34d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 35d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi // No prev words information. 36d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi PrevWordsInfo emptyPrevWordsInfo; 3789a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 3889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi emptyPrevWordsInfo.getPrevWordIds(dictionaryStructurePolicy, prevWordIds, 3989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi false /* tryLowerCaseSearch */); 40d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi current.emplace_back(); 4189a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, ¤t.front()); 42d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi for (int i = 0; i < codePointCount; ++i) { 43d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi // The base-lower input is used to ignore case errors and accent errors. 44d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]); 45d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi for (const DicNode &dicNode : current) { 46d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == codePoint) { 47d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi next.emplace_back(dicNode); 48d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi next.back().advanceDigraphIndex(); 49d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi continue; 50d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 51d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi processChildDicNodes(dictionaryStructurePolicy, codePoint, &dicNode, &next); 52d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 53d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi current.clear(); 54d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi current.swap(next); 55d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 56d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 57c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi int maxProbability = NOT_A_PROBABILITY; 58d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi for (const DicNode &dicNode : current) { 59d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (!dicNode.isTerminalDicNode()) { 60d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi continue; 61d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 62c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi const WordAttributes wordAttributes = 63c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(), 64c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi dicNode.getWordId(), nullptr /* multiBigramMap */); 65d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi // dicNode can contain case errors, accent errors, intentional omissions or digraphs. 66c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi maxProbability = std::max(maxProbability, wordAttributes.getProbability()); 67d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 68c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi return maxProbability; 69d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi} 70d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 71d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ void DictionaryUtils::processChildDicNodes( 72d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 73d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const int inputCodePoint, const DicNode *const parentDicNode, 74d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi std::vector<DicNode> *const outDicNodes) { 75d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi DicNodeVector childDicNodes; 76d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes); 77d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) { 78d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi DicNode *const childDicNode = childDicNodes[childIndex]; 79d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint()); 80d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (inputCodePoint == codePoint) { 81d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi outDicNodes->emplace_back(*childDicNode); 82d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 83d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (childDicNode->canBeIntentionalOmission()) { 84d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode, 85d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi outDicNodes); 86d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 87d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (DigraphUtils::hasDigraphForCodePoint( 88d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi dictionaryStructurePolicy->getHeaderStructurePolicy(), 89d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi childDicNode->getNodeCodePoint())) { 90d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi childDicNode->advanceDigraphIndex(); 91d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (childDicNode->getNodeCodePoint() == codePoint) { 92d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi childDicNode->advanceDigraphIndex(); 93d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi outDicNodes->emplace_back(*childDicNode); 94d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 95d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 96d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 97d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi} 98d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 99d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi} // namespace latinime 100