1d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* 2d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project 3d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * 4d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * you may not use this file except in compliance with the License. 6d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * You may obtain a copy of the License at 7d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * 8d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * 10d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * See the License for the specific language governing permissions and 14d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * limitations under the License. 15d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi */ 16d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 17d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/dictionary_utils.h" 18d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 1988bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/dictionary_structure_with_buffer_policy.h" 2088bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/ngram_context.h" 21d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node.h" 22d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node_priority_queue.h" 23d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node_vector.h" 24d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/dictionary.h" 25d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/digraph_utils.h" 26537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi#include "utils/int_array_view.h" 27d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 28d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanaginamespace latinime { 29d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 30d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches( 31d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 32fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi const CodePointArrayView codePoints) { 33d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi std::vector<DicNode> current; 34d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi std::vector<DicNode> next; 35d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 3672e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi // No ngram context. 3772e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi NgramContext emptyNgramContext; 38c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; 3972e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds( 40c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */); 41d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi current.emplace_back(); 42c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, ¤t.front()); 43fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi for (const int codePoint : codePoints) { 44d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi // The base-lower input is used to ignore case errors and accent errors. 45fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi const int baseLowerCodePoint = CharUtils::toBaseLowerCase(codePoint); 46d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi for (const DicNode &dicNode : current) { 47fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == baseLowerCodePoint) { 48d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi next.emplace_back(dicNode); 49d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi next.back().advanceDigraphIndex(); 50d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi continue; 51d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 52fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi processChildDicNodes(dictionaryStructurePolicy, baseLowerCodePoint, &dicNode, &next); 53d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 54d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi current.clear(); 55d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi current.swap(next); 56d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 57d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 58c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi int maxProbability = NOT_A_PROBABILITY; 59d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi for (const DicNode &dicNode : current) { 60d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (!dicNode.isTerminalDicNode()) { 61d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi continue; 62d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 63c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi const WordAttributes wordAttributes = 64c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(), 65c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi dicNode.getWordId(), nullptr /* multiBigramMap */); 66d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi // dicNode can contain case errors, accent errors, intentional omissions or digraphs. 67c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi maxProbability = std::max(maxProbability, wordAttributes.getProbability()); 68d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 69c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi return maxProbability; 70d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi} 71d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 72d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ void DictionaryUtils::processChildDicNodes( 73d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 74d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const int inputCodePoint, const DicNode *const parentDicNode, 75d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi std::vector<DicNode> *const outDicNodes) { 76d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi DicNodeVector childDicNodes; 77d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes); 78d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) { 79d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi DicNode *const childDicNode = childDicNodes[childIndex]; 80d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint()); 81d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (inputCodePoint == codePoint) { 82d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi outDicNodes->emplace_back(*childDicNode); 83d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 84d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (childDicNode->canBeIntentionalOmission()) { 85d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode, 86d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi outDicNodes); 87d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 88d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (DigraphUtils::hasDigraphForCodePoint( 89d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi dictionaryStructurePolicy->getHeaderStructurePolicy(), 90d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi childDicNode->getNodeCodePoint())) { 91d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi childDicNode->advanceDigraphIndex(); 92d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi if (childDicNode->getNodeCodePoint() == codePoint) { 93d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi childDicNode->advanceDigraphIndex(); 94d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi outDicNodes->emplace_back(*childDicNode); 95d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 96d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 97d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi } 98d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi} 99d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi 100d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi} // namespace latinime 101