1c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi/* 2c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Copyright (C) 2013, The Android Open Source Project 3c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * 4c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Licensed under the Apache License, Version 2.0 (the "License"); 5c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * you may not use this file except in compliance with the License. 6c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * You may obtain a copy of the License at 7c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * 8c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * http://www.apache.org/licenses/LICENSE-2.0 9c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * 10c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Unless required by applicable law or agreed to in writing, software 11c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * distributed under the License is distributed on an "AS IS" BASIS, 12c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * See the License for the specific language governing permissions and 14c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * limitations under the License. 15c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi */ 16c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 17c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 18c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" 19c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 20c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "defines.h" 21c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node.h" 22c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node_vector.h" 23647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" 2465d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/utils/probability_utils.h" 25c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 26c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynaginamespace latinime { 27c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 28c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagivoid PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, 297fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi DicNodeVector *const childDicNodes) const { 301fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi if (!dicNode->hasChildren()) { 311fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi return; 321fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi } 331fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi int nextPos = dicNode->getChildrenPos(); 34009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi if (nextPos < 0 || nextPos >= mDictBufferSize) { 35009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", 36009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi nextPos, mDictBufferSize); 37009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi ASSERT(false); 38009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi return; 39009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi } 4027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( 41e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi mDictRoot, &nextPos); 421fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi for (int i = 0; i < childCount; i++) { 43009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi if (nextPos < 0 || nextPos >= mDictBufferSize) { 44009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d", 45009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi nextPos, mDictBufferSize, i, childCount); 46009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi ASSERT(false); 47009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi return; 48009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi } 497fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); 501fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi } 51c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} 52c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 53381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// This retrieves code points and the probability of the word by its terminal position. 54381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// Due to the fact that words are ordered in the dictionary in a strict breadth-first order, 55381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// it is possible to check for this with advantageous complexity. For each node, we search 5627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// for PtNodes with children and compare the children position with the position we look for. 57381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// When we shoot the position we look for, it means the word we look for is in the children 5827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a 5927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// PtNode array with the last PtNode's children position still less than what we are searching for, 6027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// we must descend the last PtNode's children (for example, if the word we are searching for starts 6127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// with a z, it's the last PtNode of the root array, so all children addresses will be smaller 62381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// than the position we look for, and we have to descend the z node). 63381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi/* Parameters : 6477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is 65381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * what is stored as the "bigram position" in each bigram) 66381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size. 67381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * outUnigramProbability: a pointer to an int to write the probability into. 68381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * Return value : the code point count, of 0 if the word was not found. 69381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi */ 70381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// TODO: Split this function to be more readable 711311cdcb6233abde792a9d9fdd294334c9be7043Keisuke Kuroynagiint PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( 7277ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, 73c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi int *const outUnigramProbability) const { 74381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int pos = getRootPosition(); 75381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int wordPos = 0; 7627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will 7727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // only traverse nodes that are actually a part of the terminal we are searching, so each time 78381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // we enter this loop we are one depth level further than last time. 79381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // The only reason we count nodes is because we want to reduce the probability of infinite 80381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // looping in case there is a bug. Since we know there is an upper bound to the depth we are 81381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // supposed to traverse, it does not hurt to count iterations. 82381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { 8327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi int lastCandidatePtNodePos = 0; 8427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // Let's loop through PtNodes in this PtNode array searching for either the terminal 85381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // or one of its ascendants. 8627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( 8727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) { 88381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int startPos = pos; 89381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const PatriciaTrieReadingUtils::NodeFlags flags = 90381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); 91381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 92381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mDictRoot, &pos); 9377ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == startPos) { 94381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // We found the position. Copy the rest of the code points in the buffer and return 95381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // the length. 96381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi outCodePoints[wordPos] = character; 97381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { 98381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 99381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mDictRoot, &pos); 100381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // We count code points in order to avoid infinite loops if the file is broken 101381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // or if there is some other bug 102381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int charCount = maxCodePointCount; 103381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi while (NOT_A_CODE_POINT != nextChar && --charCount > 0) { 104381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi outCodePoints[++wordPos] = nextChar; 105381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 106381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mDictRoot, &pos); 107381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 108381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 109381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi *outUnigramProbability = 110381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, 111381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi &pos); 112381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi return ++wordPos; 113381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 11427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // We need to skip past this PtNode, so skip any remaining code points after the 115381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // first and possibly the probability. 116381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { 117381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); 118381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 119381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::isTerminal(flags)) { 120381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); 121381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 12227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // The fact that this PtNode has children is very important. Since we already know 12327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // that this PtNode does not match, if it has no children we know it is irrelevant 124381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // to what we are searching for. 125381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags); 126381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // We will write in `found' whether we have passed the children position we are 127381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // searching for. For example if we search for "beer", the children of b are less 128381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // than the address we are searching for and the children of c are greater. When we 129381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // come here for c, we realize this is too big, and that we should descend b. 130381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi bool found; 131381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (hasChildren) { 132381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int currentPos = pos; 133381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Here comes the tricky part. First, read the children position. 134381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int childrenPos = PatriciaTrieReadingUtils 135381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos); 13677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (childrenPos > ptNodePos) { 137381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // If the children pos is greater than the position, it means the previous 13827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // PtNode, which position is stored in lastCandidatePtNodePos, was the right 139381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // one. 140381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi found = true; 14127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi } else if (1 >= ptNodeCount) { 14227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // However if we are on the LAST PtNode of this array, and we have NOT shot the 14327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // position we should descend THIS node. So we trick the lastCandidatePtNodePos 14427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // so that we will descend this PtNode, not the previous one. 14527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi lastCandidatePtNodePos = startPos; 146381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi found = true; 147381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 148381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Else, we should continue looking. 149381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi found = false; 150381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 151381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 15227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // Even if we don't have children here, we could still be on the last PtNode of / 15327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // this array. If this is the case, we should descend the last PtNode that had 15427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // children, and their position is already in lastCandidatePtNodePos. 15527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi found = (1 >= ptNodeCount); 156381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 157381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi 158381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (found) { 15927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // Okay, we found the PtNode we should descend. Its position is in 16027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // the lastCandidatePtNodePos variable, so we just re-read it. 16127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi if (0 != lastCandidatePtNodePos) { 162381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const PatriciaTrieReadingUtils::NodeFlags lastFlags = 163381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition( 16427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi mDictRoot, &lastCandidatePtNodePos); 165381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 16627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi mDictRoot, &lastCandidatePtNodePos); 16727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // We copy all the characters in this PtNode to the buffer 168381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi outCodePoints[wordPos] = lastChar; 169381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) { 170381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 17127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi mDictRoot, &lastCandidatePtNodePos); 172381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int charCount = maxCodePointCount; 173381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi while (-1 != nextChar && --charCount > 0) { 174381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi outCodePoints[++wordPos] = nextChar; 175381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 17627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi mDictRoot, &lastCandidatePtNodePos); 177381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 178381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 179381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi ++wordPos; 180381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Now we only need to branch to the children address. Skip the probability if 181381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // it's there, read pos, and break to resume the search at pos. 182381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) { 183381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, 18427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi &lastCandidatePtNodePos); 185381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 186381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 18727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi mDictRoot, lastFlags, &lastCandidatePtNodePos); 188381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi break; 189381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 190381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Here is a little tricky part: we come here if we found out that all children 19127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // addresses in this PtNode are bigger than the address we are searching for. 192381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Should we conclude the word is not in the dictionary? No! It could still be 19327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // one of the remaining PtNodes in this array, so we have to keep looking in 19427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // this array until we find it (or we realize it's not there either, in which 19527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // case it's actually not in the dictionary). Pass the end of this PtNode, 19627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // ready to start the next one. 197381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 198381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 199381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mDictRoot, flags, &pos); 200381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 201381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 202381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mShortcutListPolicy.skipAllShortcuts(&pos); 203381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 204381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasBigrams(flags)) { 205381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mBigramListPolicy.skipAllBigrams(&pos); 206381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 207381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 208381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 209381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // If we did not find it, we should record the last children address for the next 210381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // iteration. 21127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi if (hasChildren) lastCandidatePtNodePos = startPos; 21227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // Now skip the end of this PtNode (children pos and the attributes if any) so that 21327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // our pos is after the end of this PtNode, at the start of the next one. 214381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 215381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 216381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mDictRoot, flags, &pos); 217381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 218381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 219381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mShortcutListPolicy.skipAllShortcuts(&pos); 220381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 221381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasBigrams(flags)) { 222381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mBigramListPolicy.skipAllBigrams(&pos); 223381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 224381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 225381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi 226381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 227381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 22877ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi // If we have looked through all the PtNodes and found no match, the ptNodePos is 229381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // not the position of a terminal in this dictionary. 230381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi return 0; 231c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} 232c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 233381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// This function gets the position of the terminal node of the exact matching word in the 234cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi// dictionary. If no match is found, it returns NOT_A_DICT_POS. 235e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagiint PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, 236c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi const int length, const bool forceLowerCaseSearch) const { 237381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int pos = getRootPosition(); 238381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int wordPos = 0; 239381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi 240381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi while (true) { 241381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // If we already traversed the tree further than the word is long, there means 242381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // there was no match (or we would have found it). 243cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (wordPos >= length) return NOT_A_DICT_POS; 24427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, 245381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi &pos); 246381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int wChar = forceLowerCaseSearch 247381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; 248381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi while (true) { 24927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // If there are no more PtNodes in this array, it means we could not 250381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // find a matching character for this depth, therefore there is no match. 251cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (0 >= ptNodeCount) return NOT_A_DICT_POS; 25227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi const int ptNodePos = pos; 253381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const PatriciaTrieReadingUtils::NodeFlags flags = 254381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); 255381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, 256381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi &pos); 257381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (character == wChar) { 25827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // This is the correct PtNode. Only one PtNode may start with the same char within 25927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // a PtNode array, so either we found our match in this array, or there is 260cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi // no match and we can return NOT_A_DICT_POS. So we will check all the 26127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // characters in this PtNode indeed does match. 262381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { 263381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, 264381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi &pos); 265381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi while (NOT_A_CODE_POINT != character) { 266381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi ++wordPos; 267381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // If we shoot the length of the word we search for, or if we find a single 268381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // character that does not match, as explained above, it means the word is 26927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // not in the dictionary (by virtue of this PtNode being the only one to 270381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // match the word on the first character, but not matching the whole word). 271cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (wordPos >= length) return NOT_A_DICT_POS; 272cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (inWord[wordPos] != character) return NOT_A_DICT_POS; 273381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 274381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mDictRoot, &pos); 275381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 276381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 277381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // If we come here we know that so far, we do match. Either we are on a terminal 278381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // and we match the length, in which case we found it, or we traverse children. 279381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // If we don't match the length AND don't have children, then a word in the 280381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // dictionary fully matches a prefix of the searched word but not the full word. 281381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi ++wordPos; 282381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::isTerminal(flags)) { 283381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (wordPos == length) { 28427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi return ptNodePos; 285381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 286381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); 287381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 288381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 289cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi return NOT_A_DICT_POS; 290381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 291381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // We have children and we are still shorter than the word we are searching for, so 292381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // we need to traverse children. Put the pointer on the children position, and 293381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // break 294381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, 295381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi flags, &pos); 296381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi break; 297381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 29827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // This PtNode does not match, so skip the remaining part and go to the next. 299381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { 300381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, 301381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi &pos); 302381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 303381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::isTerminal(flags)) { 304381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); 305381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 306381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 307381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, 308381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi flags, &pos); 309381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 310381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 311381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mShortcutListPolicy.skipAllShortcuts(&pos); 312381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 313381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasBigrams(flags)) { 314381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mBigramListPolicy.skipAllBigrams(&pos); 315381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 316381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 31727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi --ptNodeCount; 318381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 319381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 320c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} 321c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 32265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagiint PatriciaTriePolicy::getProbability(const int unigramProbability, 32365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi const int bigramProbability) const { 32465d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi if (unigramProbability == NOT_A_PROBABILITY) { 32565d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return NOT_A_PROBABILITY; 32665d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi } else if (bigramProbability == NOT_A_PROBABILITY) { 32765d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return ProbabilityUtils::backoff(unigramProbability); 32865d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi } else { 32965d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, 33065d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi bigramProbability); 33165d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi } 33265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi} 33365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi 33477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { 33577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == NOT_A_DICT_POS) { 336647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_PROBABILITY; 337647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 33877ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi int pos = ptNodePos; 339647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi const PatriciaTrieReadingUtils::NodeFlags flags = 340e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); 341647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (!PatriciaTrieReadingUtils::isTerminal(flags)) { 342647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_PROBABILITY; 343647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 344647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (PatriciaTrieReadingUtils::isNotAWord(flags) 345647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi || PatriciaTrieReadingUtils::isBlacklisted(flags)) { 346c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi // If this is not a word, or if it's a blacklisted entry, it should behave as 347c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi // having no probability outside of the suggestion process (where it should be used 348c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi // for shortcuts). 349c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi return NOT_A_PROBABILITY; 350c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi } 351e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); 35265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition( 35365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi mDictRoot, &pos), NOT_A_PROBABILITY); 354c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} 355c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 35677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { 35777ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == NOT_A_DICT_POS) { 358647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_DICT_POS; 359647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 36077ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi int pos = ptNodePos; 361647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi const PatriciaTrieReadingUtils::NodeFlags flags = 362e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); 363647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 364647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_DICT_POS; 365647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 366e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); 367647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (PatriciaTrieReadingUtils::isTerminal(flags)) { 368e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); 369647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 370647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 371e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); 372647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 373647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return pos; 374c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi} 375c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi 37677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { 37777ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == NOT_A_DICT_POS) { 378647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_DICT_POS; 379647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 38077ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi int pos = ptNodePos; 381647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi const PatriciaTrieReadingUtils::NodeFlags flags = 382e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); 383647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (!PatriciaTrieReadingUtils::hasBigrams(flags)) { 384647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_DICT_POS; 385647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 386e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); 387647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (PatriciaTrieReadingUtils::isTerminal(flags)) { 388e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); 389647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 390647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 391e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); 392647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 393647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 394fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi mShortcutListPolicy.skipAllShortcuts(&pos);; 395647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 396647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return pos; 397c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi} 398c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi 399647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagiint PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, 40077ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi const int ptNodePos, DicNodeVector *childDicNodes) const { 40177ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi int pos = ptNodePos; 402647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi const PatriciaTrieReadingUtils::NodeFlags flags = 403e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); 4041fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi int mergedNodeCodePoints[MAX_WORD_LENGTH]; 405647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( 406e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); 407647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))? 408e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos) 409647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi : NOT_A_PROBABILITY; 410647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ? 411647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 412e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi mDictRoot, flags, &pos) : NOT_A_DICT_POS; 413647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 414fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi getShortcutsStructurePolicy()->skipAllShortcuts(&pos); 415647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 416647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi if (PatriciaTrieReadingUtils::hasBigrams(flags)) { 417668870be431d17ee4ceb5ce161aee1189063af18Keisuke Kuroyanagi getBigramsStructurePolicy()->skipAllBigrams(&pos); 418647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 4199155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi if (mergedNodeCodePointCount <= 0) { 4209155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); 4219155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi ASSERT(false); 4229155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi return pos; 4239155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi } 42477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, 4257fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi PatriciaTrieReadingUtils::isTerminal(flags), 4267fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi PatriciaTrieReadingUtils::hasChildrenInFlags(flags), 4277fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi PatriciaTrieReadingUtils::isBlacklisted(flags) || 4287fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi PatriciaTrieReadingUtils::isNotAWord(flags), 4297fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi mergedNodeCodePointCount, mergedNodeCodePoints); 430647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return pos; 4311fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi} 4321fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi 433c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} // namespace latinime 434