138c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/*
238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * Copyright (C) 2012 The Android Open Source Project
338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka *
438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * Licensed under the Apache License, Version 2.0 (the "License");
538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * you may not use this file except in compliance with the License.
638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * You may obtain a copy of the License at
738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka *
838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka *      http://www.apache.org/licenses/LICENSE-2.0
938c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka *
1038c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * Unless required by applicable law or agreed to in writing, software
1138c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * distributed under the License is distributed on an "AS IS" BASIS,
1238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * See the License for the specific language governing permissions and
1438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * limitations under the License.
1538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka */
1638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
177a06a792871c38517264fcb63b80a9c09bfe4766Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node_utils.h"
187a06a792871c38517264fcb63b80a9c09bfe4766Keisuke Kuroynagi
1988bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
20a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dicnode/dic_node.h"
21a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dicnode/dic_node_vector.h"
2238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
2338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataokanamespace latinime {
2438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
2538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka///////////////////////////////
2638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka// Node initialization utils //
2738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka///////////////////////////////
2838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
29d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi/* static */ void DicNodeUtils::initAsRoot(
30d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
31537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi        const WordIdArrayView prevWordIds, DicNode *const newRootDicNode) {
3289a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordIds);
3338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka}
3438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
350ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
36d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
372fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
382fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    newRootDicNode->initAsRootWithPreviousWord(
392fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
4038c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka}
4138c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
422fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
432fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        DicNode *const destDicNode) {
442fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    destDicNode->initByCopy(srcDicNode);
4538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka}
4638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
4738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka///////////////////////////////////
4838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka// Traverse node expansion utils //
4938c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka///////////////////////////////////
50d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ void DicNodeUtils::getAllChildDicNodes(const DicNode *dicNode,
51d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
522fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        DicNodeVector *const childDicNodes) {
5338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka    if (dicNode->isTotalInputSizeExceedingLimit()) {
5438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka        return;
5538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka    }
5638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka    if (!dicNode->isLeavingNode()) {
577fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        childDicNodes->pushPassingChild(dicNode);
5838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka    } else {
592fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
6038c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka    }
6138c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka}
6238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
6338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka///////////////////
6438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka// Scoring utils //
6538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka///////////////////
6638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/**
6738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * Computes the combined bigram / unigram cost for the given dicNode.
6838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka */
690ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi/* static */ float DicNodeUtils::getBigramNodeImprobability(
70d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
722fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
7338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
7438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka    }
752111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi    const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext(
769f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi            dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
77090c3819d7d4481ebb16a220225e2106f2f1b2a2Keisuke Kuroyanagi    if (wordAttributes.getProbability() == NOT_A_PROBABILITY
78090c3819d7d4481ebb16a220225e2106f2f1b2a2Keisuke Kuroyanagi            || (dicNode->hasMultipleWords()
79090c3819d7d4481ebb16a220225e2106f2f1b2a2Keisuke Kuroyanagi                    && (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()))) {
8087a5c76906bae9546189888fa009ce0032ddad0fKeisuke Kuroyanagi        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
8187a5c76906bae9546189888fa009ce0032ddad0fKeisuke Kuroyanagi    }
8238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka    // TODO: This equation to calculate the improbability looks unreasonable.  Investigate this.
832111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi    const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability())
8438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka            / static_cast<float>(MAX_PROBABILITY);
8538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka    return cost;
8638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka}
8738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka
8838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka} // namespace latinime
89