dic_node_utils.cpp revision 7fd9667d76cdc6febe32545865648ea90dc28904
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/core/dicnode/dic_node_utils.h" 18 19#include <cstring> 20 21#include "suggest/core/dicnode/dic_node.h" 22#include "suggest/core/dicnode/dic_node_vector.h" 23#include "suggest/core/dictionary/multi_bigram_map.h" 24#include "suggest/core/dictionary/probability_utils.h" 25#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 26#include "utils/char_utils.h" 27 28namespace latinime { 29 30/////////////////////////////// 31// Node initialization utils // 32/////////////////////////////// 33 34/* static */ void DicNodeUtils::initAsRoot( 35 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 36 const int prevWordNodePos, DicNode *const newRootNode) { 37 newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); 38} 39 40/*static */ void DicNodeUtils::initAsRootWithPreviousWord( 41 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 42 DicNode *const prevWordLastNode, DicNode *const newRootNode) { 43 newRootNode->initAsRootWithPreviousWord( 44 prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); 45} 46 47/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { 48 destNode->initByCopy(srcNode); 49} 50 51/////////////////////////////////// 52// Traverse node expansion utils // 53/////////////////////////////////// 54/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, 55 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 56 DicNodeVector *childDicNodes) { 57 if (dicNode->isTotalInputSizeExceedingLimit()) { 58 return; 59 } 60 if (!dicNode->isLeavingNode()) { 61 childDicNodes->pushPassingChild(dicNode); 62 } else { 63 dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes); 64 } 65} 66 67/////////////////// 68// Scoring utils // 69/////////////////// 70/** 71 * Computes the combined bigram / unigram cost for the given dicNode. 72 */ 73/* static */ float DicNodeUtils::getBigramNodeImprobability( 74 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 75 const DicNode *const node, MultiBigramMap *multiBigramMap) { 76 if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { 77 return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); 78 } 79 const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, 80 multiBigramMap); 81 // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. 82 const float cost = static_cast<float>(MAX_PROBABILITY - probability) 83 / static_cast<float>(MAX_PROBABILITY); 84 return cost; 85} 86 87/* static */ int DicNodeUtils::getBigramNodeProbability( 88 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 89 const DicNode *const node, MultiBigramMap *multiBigramMap) { 90 const int unigramProbability = node->getProbability(); 91 const int wordPos = node->getPos(); 92 const int prevWordPos = node->getPrevWordPos(); 93 if (NOT_A_VALID_WORD_POS == wordPos || NOT_A_VALID_WORD_POS == prevWordPos) { 94 // Note: Normally wordPos comes from the dictionary and should never equal 95 // NOT_A_VALID_WORD_POS. 96 return ProbabilityUtils::backoff(unigramProbability); 97 } 98 if (multiBigramMap) { 99 return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, 100 wordPos, unigramProbability); 101 } 102 return ProbabilityUtils::backoff(unigramProbability); 103} 104 105//////////////// 106// Char utils // 107//////////////// 108 109// TODO: Move to char_utils? 110/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, 111 const int *const src1, const int16_t length1, int *dest) { 112 int actualLength0 = 0; 113 for (int i = 0; i < length0; ++i) { 114 if (src0[i] == 0) { 115 break; 116 } 117 actualLength0 = i + 1; 118 } 119 actualLength0 = min(actualLength0, MAX_WORD_LENGTH); 120 memcpy(dest, src0, actualLength0 * sizeof(dest[0])); 121 if (!src1 || length1 == 0) { 122 return actualLength0; 123 } 124 int actualLength1 = 0; 125 for (int i = 0; i < length1; ++i) { 126 if (src1[i] == 0) { 127 break; 128 } 129 actualLength1 = i + 1; 130 } 131 actualLength1 = min(actualLength1, MAX_WORD_LENGTH - actualLength0); 132 memcpy(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0])); 133 return actualLength0 + actualLength1; 134} 135} // namespace latinime 136