138c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/* 238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * Copyright (C) 2012 The Android Open Source Project 338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * 438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * Licensed under the Apache License, Version 2.0 (the "License"); 538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * you may not use this file except in compliance with the License. 638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * You may obtain a copy of the License at 738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * 838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * http://www.apache.org/licenses/LICENSE-2.0 938c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * 1038c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * Unless required by applicable law or agreed to in writing, software 1138c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * distributed under the License is distributed on an "AS IS" BASIS, 1238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * See the License for the specific language governing permissions and 1438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * limitations under the License. 1538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka */ 1638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 177a06a792871c38517264fcb63b80a9c09bfe4766Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node_utils.h" 187a06a792871c38517264fcb63b80a9c09bfe4766Keisuke Kuroynagi 1988bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/dictionary_structure_with_buffer_policy.h" 20a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dicnode/dic_node.h" 21a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dicnode/dic_node_vector.h" 2238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 2338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataokanamespace latinime { 2438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 2538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/////////////////////////////// 2638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka// Node initialization utils // 2738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/////////////////////////////// 2838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 29d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi/* static */ void DicNodeUtils::initAsRoot( 30d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 31537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi const WordIdArrayView prevWordIds, DicNode *const newRootDicNode) { 3289a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordIds); 3338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka} 3438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 350ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi/*static */ void DicNodeUtils::initAsRootWithPreviousWord( 36d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 372fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) { 382fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa newRootDicNode->initAsRootWithPreviousWord( 392fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition()); 4038c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka} 4138c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 422fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode, 432fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa DicNode *const destDicNode) { 442fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa destDicNode->initByCopy(srcDicNode); 4538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka} 4638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 4738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/////////////////////////////////// 4838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka// Traverse node expansion utils // 4938c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/////////////////////////////////// 50d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ void DicNodeUtils::getAllChildDicNodes(const DicNode *dicNode, 51d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 522fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa DicNodeVector *const childDicNodes) { 5338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka if (dicNode->isTotalInputSizeExceedingLimit()) { 5438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka return; 5538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka } 5638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka if (!dicNode->isLeavingNode()) { 577fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi childDicNodes->pushPassingChild(dicNode); 5838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka } else { 592fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes); 6038c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka } 6138c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka} 6238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 6338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/////////////////// 6438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka// Scoring utils // 6538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/////////////////// 6638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka/** 6738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka * Computes the combined bigram / unigram cost for the given dicNode. 6838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka */ 690ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi/* static */ float DicNodeUtils::getBigramNodeImprobability( 70d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { 722fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { 7338c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); 7438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka } 752111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext( 769f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap); 77090c3819d7d4481ebb16a220225e2106f2f1b2a2Keisuke Kuroyanagi if (wordAttributes.getProbability() == NOT_A_PROBABILITY 78090c3819d7d4481ebb16a220225e2106f2f1b2a2Keisuke Kuroyanagi || (dicNode->hasMultipleWords() 79090c3819d7d4481ebb16a220225e2106f2f1b2a2Keisuke Kuroyanagi && (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()))) { 8087a5c76906bae9546189888fa009ce0032ddad0fKeisuke Kuroyanagi return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); 8187a5c76906bae9546189888fa009ce0032ddad0fKeisuke Kuroyanagi } 8238c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. 832111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability()) 8438c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka / static_cast<float>(MAX_PROBABILITY); 8538c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka return cost; 8638c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka} 8738c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka 8838c26dd0bf8cd5c4511e4a02d5eeae4b3553f03aSatoshi Kataoka} // namespace latinime 89