1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/core/dicnode/dic_node_utils.h"
18
19#include <cstring>
20
21#include "suggest/core/dicnode/dic_node.h"
22#include "suggest/core/dicnode/dic_node_vector.h"
23#include "suggest/core/dictionary/multi_bigram_map.h"
24#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
25#include "utils/char_utils.h"
26
27namespace latinime {
28
29///////////////////////////////
30// Node initialization utils //
31///////////////////////////////
32
33/* static */ void DicNodeUtils::initAsRoot(
34        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
35        const int prevWordNodePos, DicNode *const newRootNode) {
36    newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
37}
38
39/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
40        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
41        DicNode *const prevWordLastNode, DicNode *const newRootNode) {
42    newRootNode->initAsRootWithPreviousWord(
43            prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
44}
45
46/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
47    destNode->initByCopy(srcNode);
48}
49
50///////////////////////////////////
51// Traverse node expansion utils //
52///////////////////////////////////
53/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
54        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
55        DicNodeVector *childDicNodes) {
56    if (dicNode->isTotalInputSizeExceedingLimit()) {
57        return;
58    }
59    if (!dicNode->isLeavingNode()) {
60        childDicNodes->pushPassingChild(dicNode);
61    } else {
62        dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
63    }
64}
65
66///////////////////
67// Scoring utils //
68///////////////////
69/**
70 * Computes the combined bigram / unigram cost for the given dicNode.
71 */
72/* static */ float DicNodeUtils::getBigramNodeImprobability(
73        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
74        const DicNode *const node, MultiBigramMap *multiBigramMap) {
75    if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
76        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
77    }
78    const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
79            multiBigramMap);
80    // TODO: This equation to calculate the improbability looks unreasonable.  Investigate this.
81    const float cost = static_cast<float>(MAX_PROBABILITY - probability)
82            / static_cast<float>(MAX_PROBABILITY);
83    return cost;
84}
85
86/* static */ int DicNodeUtils::getBigramNodeProbability(
87        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
88        const DicNode *const node, MultiBigramMap *multiBigramMap) {
89    const int unigramProbability = node->getProbability();
90    const int wordPos = node->getPos();
91    const int prevWordPos = node->getPrevWordPos();
92    if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
93        // Note: Normally wordPos comes from the dictionary and should never equal
94        // NOT_A_VALID_WORD_POS.
95        return dictionaryStructurePolicy->getProbability(unigramProbability,
96                NOT_A_PROBABILITY);
97    }
98    if (multiBigramMap) {
99        return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
100                wordPos, unigramProbability);
101    }
102    return dictionaryStructurePolicy->getProbability(unigramProbability,
103            NOT_A_PROBABILITY);
104}
105
106////////////////
107// Char utils //
108////////////////
109
110// TODO: Move to char_utils?
111/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
112        const int *const src1, const int16_t length1, int *dest) {
113    int actualLength0 = 0;
114    for (int i = 0; i < length0; ++i) {
115        if (src0[i] == 0) {
116            break;
117        }
118        actualLength0 = i + 1;
119    }
120    actualLength0 = min(actualLength0, MAX_WORD_LENGTH);
121    memcpy(dest, src0, actualLength0 * sizeof(dest[0]));
122    if (!src1 || length1 == 0) {
123        return actualLength0;
124    }
125    int actualLength1 = 0;
126    for (int i = 0; i < length1; ++i) {
127        if (src1[i] == 0) {
128            break;
129        }
130        actualLength1 = i + 1;
131    }
132    actualLength1 = min(actualLength1, MAX_WORD_LENGTH - actualLength0);
133    memcpy(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0]));
134    return actualLength0 + actualLength1;
135}
136} // namespace latinime
137