dic_node_utils.cpp revision cf612a3abfd0cc244f8449db2cf11a0a7c680a2f
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/core/dicnode/dic_node_utils.h"
18
19#include <algorithm>
20#include <cstring>
21
22#include "suggest/core/dicnode/dic_node.h"
23#include "suggest/core/dicnode/dic_node_vector.h"
24#include "suggest/core/dictionary/multi_bigram_map.h"
25#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
26
27namespace latinime {
28
29///////////////////////////////
30// Node initialization utils //
31///////////////////////////////
32
33/* static */ void DicNodeUtils::initAsRoot(
34        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
35        const int prevWordPtNodePos, DicNode *const newRootDicNode) {
36    newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
37}
38
39/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
40        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
41        const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
42    newRootDicNode->initAsRootWithPreviousWord(
43            prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
44}
45
46/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
47        DicNode *const destDicNode) {
48    destDicNode->initByCopy(srcDicNode);
49}
50
51///////////////////////////////////
52// Traverse node expansion utils //
53///////////////////////////////////
54/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
55        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
56        DicNodeVector *const childDicNodes) {
57    if (dicNode->isTotalInputSizeExceedingLimit()) {
58        return;
59    }
60    if (!dicNode->isLeavingNode()) {
61        childDicNodes->pushPassingChild(dicNode);
62    } else {
63        dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
64    }
65}
66
67///////////////////
68// Scoring utils //
69///////////////////
70/**
71 * Computes the combined bigram / unigram cost for the given dicNode.
72 */
73/* static */ float DicNodeUtils::getBigramNodeImprobability(
74        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
75        const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
76    if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
77        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
78    }
79    const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
80            multiBigramMap);
81    // TODO: This equation to calculate the improbability looks unreasonable.  Investigate this.
82    const float cost = static_cast<float>(MAX_PROBABILITY - probability)
83            / static_cast<float>(MAX_PROBABILITY);
84    return cost;
85}
86
87/* static */ int DicNodeUtils::getBigramNodeProbability(
88        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
89        const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
90    const int unigramProbability = dicNode->getProbability();
91    const int ptNodePos = dicNode->getPtNodePos();
92    const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
93    if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
94        // Note: Normally wordPos comes from the dictionary and should never equal
95        // NOT_A_VALID_WORD_POS.
96        return dictionaryStructurePolicy->getProbability(unigramProbability,
97                NOT_A_PROBABILITY);
98    }
99    if (multiBigramMap) {
100        return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
101                prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
102    }
103    return dictionaryStructurePolicy->getProbability(unigramProbability,
104            NOT_A_PROBABILITY);
105}
106
107////////////////
108// Char utils //
109////////////////
110
111// TODO: Move to char_utils?
112/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
113        const int *const src1, const int16_t length1, int *const dest) {
114    int actualLength0 = 0;
115    for (int i = 0; i < length0; ++i) {
116        if (src0[i] == 0) {
117            break;
118        }
119        actualLength0 = i + 1;
120    }
121    actualLength0 = std::min(actualLength0, MAX_WORD_LENGTH);
122    memmove(dest, src0, actualLength0 * sizeof(dest[0]));
123    if (!src1 || length1 == 0) {
124        return actualLength0;
125    }
126    int actualLength1 = 0;
127    for (int i = 0; i < length1; ++i) {
128        if (src1[i] == 0) {
129            break;
130        }
131        actualLength1 = i + 1;
132    }
133    actualLength1 = std::min(actualLength1, MAX_WORD_LENGTH - actualLength0);
134    memmove(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0]));
135    return actualLength0 + actualLength1;
136}
137} // namespace latinime
138