dic_node_utils.cpp revision 7fd9667d76cdc6febe32545865648ea90dc28904
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/core/dicnode/dic_node_utils.h"
18
19#include <cstring>
20
21#include "suggest/core/dicnode/dic_node.h"
22#include "suggest/core/dicnode/dic_node_vector.h"
23#include "suggest/core/dictionary/multi_bigram_map.h"
24#include "suggest/core/dictionary/probability_utils.h"
25#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
26#include "utils/char_utils.h"
27
28namespace latinime {
29
30///////////////////////////////
31// Node initialization utils //
32///////////////////////////////
33
34/* static */ void DicNodeUtils::initAsRoot(
35        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
36        const int prevWordNodePos, DicNode *const newRootNode) {
37    newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
38}
39
40/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
41        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
42        DicNode *const prevWordLastNode, DicNode *const newRootNode) {
43    newRootNode->initAsRootWithPreviousWord(
44            prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
45}
46
47/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
48    destNode->initByCopy(srcNode);
49}
50
51///////////////////////////////////
52// Traverse node expansion utils //
53///////////////////////////////////
54/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
55        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
56        DicNodeVector *childDicNodes) {
57    if (dicNode->isTotalInputSizeExceedingLimit()) {
58        return;
59    }
60    if (!dicNode->isLeavingNode()) {
61        childDicNodes->pushPassingChild(dicNode);
62    } else {
63        dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
64    }
65}
66
67///////////////////
68// Scoring utils //
69///////////////////
70/**
71 * Computes the combined bigram / unigram cost for the given dicNode.
72 */
73/* static */ float DicNodeUtils::getBigramNodeImprobability(
74        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
75        const DicNode *const node, MultiBigramMap *multiBigramMap) {
76    if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
77        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
78    }
79    const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
80            multiBigramMap);
81    // TODO: This equation to calculate the improbability looks unreasonable.  Investigate this.
82    const float cost = static_cast<float>(MAX_PROBABILITY - probability)
83            / static_cast<float>(MAX_PROBABILITY);
84    return cost;
85}
86
87/* static */ int DicNodeUtils::getBigramNodeProbability(
88        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
89        const DicNode *const node, MultiBigramMap *multiBigramMap) {
90    const int unigramProbability = node->getProbability();
91    const int wordPos = node->getPos();
92    const int prevWordPos = node->getPrevWordPos();
93    if (NOT_A_VALID_WORD_POS == wordPos || NOT_A_VALID_WORD_POS == prevWordPos) {
94        // Note: Normally wordPos comes from the dictionary and should never equal
95        // NOT_A_VALID_WORD_POS.
96        return ProbabilityUtils::backoff(unigramProbability);
97    }
98    if (multiBigramMap) {
99        return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
100                wordPos, unigramProbability);
101    }
102    return ProbabilityUtils::backoff(unigramProbability);
103}
104
105////////////////
106// Char utils //
107////////////////
108
109// TODO: Move to char_utils?
110/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
111        const int *const src1, const int16_t length1, int *dest) {
112    int actualLength0 = 0;
113    for (int i = 0; i < length0; ++i) {
114        if (src0[i] == 0) {
115            break;
116        }
117        actualLength0 = i + 1;
118    }
119    actualLength0 = min(actualLength0, MAX_WORD_LENGTH);
120    memcpy(dest, src0, actualLength0 * sizeof(dest[0]));
121    if (!src1 || length1 == 0) {
122        return actualLength0;
123    }
124    int actualLength1 = 0;
125    for (int i = 0; i < length1; ++i) {
126        if (src1[i] == 0) {
127            break;
128        }
129        actualLength1 = i + 1;
130    }
131    actualLength1 = min(actualLength1, MAX_WORD_LENGTH - actualLength0);
132    memcpy(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0]));
133    return actualLength0 + actualLength1;
134}
135} // namespace latinime
136