dictionary_utils.cpp revision 88bc312ad34321fb3e81be2dc939a889d065f4a7
1/* 2 * Copyright (C) 2014, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/core/dictionary/dictionary_utils.h" 18 19#include "dictionary/interface/dictionary_structure_with_buffer_policy.h" 20#include "dictionary/property/ngram_context.h" 21#include "suggest/core/dicnode/dic_node.h" 22#include "suggest/core/dicnode/dic_node_priority_queue.h" 23#include "suggest/core/dicnode/dic_node_vector.h" 24#include "suggest/core/dictionary/dictionary.h" 25#include "suggest/core/dictionary/digraph_utils.h" 26#include "utils/int_array_view.h" 27 28namespace latinime { 29 30/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches( 31 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 32 const CodePointArrayView codePoints) { 33 std::vector<DicNode> current; 34 std::vector<DicNode> next; 35 36 // No ngram context. 37 NgramContext emptyNgramContext; 38 WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; 39 const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds( 40 dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */); 41 current.emplace_back(); 42 DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, ¤t.front()); 43 for (const int codePoint : codePoints) { 44 // The base-lower input is used to ignore case errors and accent errors. 45 const int baseLowerCodePoint = CharUtils::toBaseLowerCase(codePoint); 46 for (const DicNode &dicNode : current) { 47 if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == baseLowerCodePoint) { 48 next.emplace_back(dicNode); 49 next.back().advanceDigraphIndex(); 50 continue; 51 } 52 processChildDicNodes(dictionaryStructurePolicy, baseLowerCodePoint, &dicNode, &next); 53 } 54 current.clear(); 55 current.swap(next); 56 } 57 58 int maxProbability = NOT_A_PROBABILITY; 59 for (const DicNode &dicNode : current) { 60 if (!dicNode.isTerminalDicNode()) { 61 continue; 62 } 63 const WordAttributes wordAttributes = 64 dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(), 65 dicNode.getWordId(), nullptr /* multiBigramMap */); 66 // dicNode can contain case errors, accent errors, intentional omissions or digraphs. 67 maxProbability = std::max(maxProbability, wordAttributes.getProbability()); 68 } 69 return maxProbability; 70} 71 72/* static */ void DictionaryUtils::processChildDicNodes( 73 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 74 const int inputCodePoint, const DicNode *const parentDicNode, 75 std::vector<DicNode> *const outDicNodes) { 76 DicNodeVector childDicNodes; 77 DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes); 78 for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) { 79 DicNode *const childDicNode = childDicNodes[childIndex]; 80 const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint()); 81 if (inputCodePoint == codePoint) { 82 outDicNodes->emplace_back(*childDicNode); 83 } 84 if (childDicNode->canBeIntentionalOmission()) { 85 processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode, 86 outDicNodes); 87 } 88 if (DigraphUtils::hasDigraphForCodePoint( 89 dictionaryStructurePolicy->getHeaderStructurePolicy(), 90 childDicNode->getNodeCodePoint())) { 91 childDicNode->advanceDigraphIndex(); 92 if (childDicNode->getNodeCodePoint() == codePoint) { 93 childDicNode->advanceDigraphIndex(); 94 outDicNodes->emplace_back(*childDicNode); 95 } 96 } 97 } 98} 99 100} // namespace latinime 101