dic_node_utils.cpp revision cf612a3abfd0cc244f8449db2cf11a0a7c680a2f
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/core/dicnode/dic_node_utils.h" 18 19#include <algorithm> 20#include <cstring> 21 22#include "suggest/core/dicnode/dic_node.h" 23#include "suggest/core/dicnode/dic_node_vector.h" 24#include "suggest/core/dictionary/multi_bigram_map.h" 25#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 26 27namespace latinime { 28 29/////////////////////////////// 30// Node initialization utils // 31/////////////////////////////// 32 33/* static */ void DicNodeUtils::initAsRoot( 34 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 35 const int prevWordPtNodePos, DicNode *const newRootDicNode) { 36 newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos); 37} 38 39/*static */ void DicNodeUtils::initAsRootWithPreviousWord( 40 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 41 const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) { 42 newRootDicNode->initAsRootWithPreviousWord( 43 prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition()); 44} 45 46/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode, 47 DicNode *const destDicNode) { 48 destDicNode->initByCopy(srcDicNode); 49} 50 51/////////////////////////////////// 52// Traverse node expansion utils // 53/////////////////////////////////// 54/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, 55 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 56 DicNodeVector *const childDicNodes) { 57 if (dicNode->isTotalInputSizeExceedingLimit()) { 58 return; 59 } 60 if (!dicNode->isLeavingNode()) { 61 childDicNodes->pushPassingChild(dicNode); 62 } else { 63 dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes); 64 } 65} 66 67/////////////////// 68// Scoring utils // 69/////////////////// 70/** 71 * Computes the combined bigram / unigram cost for the given dicNode. 72 */ 73/* static */ float DicNodeUtils::getBigramNodeImprobability( 74 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 75 const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { 76 if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { 77 return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); 78 } 79 const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode, 80 multiBigramMap); 81 // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. 82 const float cost = static_cast<float>(MAX_PROBABILITY - probability) 83 / static_cast<float>(MAX_PROBABILITY); 84 return cost; 85} 86 87/* static */ int DicNodeUtils::getBigramNodeProbability( 88 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 89 const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { 90 const int unigramProbability = dicNode->getProbability(); 91 const int ptNodePos = dicNode->getPtNodePos(); 92 const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos(); 93 if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { 94 // Note: Normally wordPos comes from the dictionary and should never equal 95 // NOT_A_VALID_WORD_POS. 96 return dictionaryStructurePolicy->getProbability(unigramProbability, 97 NOT_A_PROBABILITY); 98 } 99 if (multiBigramMap) { 100 return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, 101 prevWordTerminalPtNodePos, ptNodePos, unigramProbability); 102 } 103 return dictionaryStructurePolicy->getProbability(unigramProbability, 104 NOT_A_PROBABILITY); 105} 106 107//////////////// 108// Char utils // 109//////////////// 110 111// TODO: Move to char_utils? 112/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, 113 const int *const src1, const int16_t length1, int *const dest) { 114 int actualLength0 = 0; 115 for (int i = 0; i < length0; ++i) { 116 if (src0[i] == 0) { 117 break; 118 } 119 actualLength0 = i + 1; 120 } 121 actualLength0 = std::min(actualLength0, MAX_WORD_LENGTH); 122 memmove(dest, src0, actualLength0 * sizeof(dest[0])); 123 if (!src1 || length1 == 0) { 124 return actualLength0; 125 } 126 int actualLength1 = 0; 127 for (int i = 0; i < length1; ++i) { 128 if (src1[i] == 0) { 129 break; 130 } 131 actualLength1 = i + 1; 132 } 133 actualLength1 = std::min(actualLength1, MAX_WORD_LENGTH - actualLength0); 134 memmove(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0])); 135 return actualLength0 + actualLength1; 136} 137} // namespace latinime 138