1c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi/*
2c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Copyright (C) 2013, The Android Open Source Project
3c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
4c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Licensed under the Apache License, Version 2.0 (the "License");
5c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * you may not use this file except in compliance with the License.
6c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * You may obtain a copy of the License at
7c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
8c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *     http://www.apache.org/licenses/LICENSE-2.0
9c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
10c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Unless required by applicable law or agreed to in writing, software
11c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * distributed under the License is distributed on an "AS IS" BASIS,
12c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * See the License for the specific language governing permissions and
14c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * limitations under the License.
15c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi */
16c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
17c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
18c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
19c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
20c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "defines.h"
21c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node.h"
22c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node_vector.h"
23647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
2465d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
25c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
26c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynaginamespace latinime {
27c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
28c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagivoid PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
297fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        DicNodeVector *const childDicNodes) const {
301fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    if (!dicNode->hasChildren()) {
311fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi        return;
321fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    }
331fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    int nextPos = dicNode->getChildrenPos();
34009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi    if (nextPos < 0 || nextPos >= mDictBufferSize) {
35009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi        AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
36009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi                nextPos, mDictBufferSize);
37009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi        ASSERT(false);
38009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi        return;
39009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi    }
4027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
41e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi            mDictRoot, &nextPos);
421fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    for (int i = 0; i < childCount; i++) {
43009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi        if (nextPos < 0 || nextPos >= mDictBufferSize) {
44009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi            AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
45009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi                    nextPos, mDictBufferSize, i, childCount);
46009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi            ASSERT(false);
47009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi            return;
48009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi        }
497fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
501fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    }
51c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi}
52c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
53381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// This retrieves code points and the probability of the word by its terminal position.
54381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
55381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// it is possible to check for this with advantageous complexity. For each node, we search
5627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// for PtNodes with children and compare the children position with the position we look for.
57381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// When we shoot the position we look for, it means the word we look for is in the children
5827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
5927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// PtNode array with the last PtNode's children position still less than what we are searching for,
6027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// we must descend the last PtNode's children (for example, if the word we are searching for starts
6127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
62381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// than the position we look for, and we have to descend the z node).
63381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi/* Parameters :
6477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
65381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi *   what is stored as the "bigram position" in each bigram)
66381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
67381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * outUnigramProbability: a pointer to an int to write the probability into.
68381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * Return value : the code point count, of 0 if the word was not found.
69381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi */
70381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// TODO: Split this function to be more readable
711311cdcb6233abde792a9d9fdd294334c9be7043Keisuke Kuroynagiint PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
7277ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi        const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
73c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        int *const outUnigramProbability) const {
74381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    int pos = getRootPosition();
75381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    int wordPos = 0;
7627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
7727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    // only traverse nodes that are actually a part of the terminal we are searching, so each time
78381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    // we enter this loop we are one depth level further than last time.
79381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    // The only reason we count nodes is because we want to reduce the probability of infinite
80381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    // looping in case there is a bug. Since we know there is an upper bound to the depth we are
81381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    // supposed to traverse, it does not hurt to count iterations.
82381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
8327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi        int lastCandidatePtNodePos = 0;
8427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi        // Let's loop through PtNodes in this PtNode array searching for either the terminal
85381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        // or one of its ascendants.
8627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi        for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
8727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
88381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const int startPos = pos;
89381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const PatriciaTrieReadingUtils::NodeFlags flags =
90381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
91381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
92381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    mDictRoot, &pos);
9377ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi            if (ptNodePos == startPos) {
94381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // We found the position. Copy the rest of the code points in the buffer and return
95381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // the length.
96381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                outCodePoints[wordPos] = character;
97381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
98381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
99381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            mDictRoot, &pos);
100381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // We count code points in order to avoid infinite loops if the file is broken
101381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // or if there is some other bug
102381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    int charCount = maxCodePointCount;
103381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
104381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        outCodePoints[++wordPos] = nextChar;
105381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
106381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                                mDictRoot, &pos);
107381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
108381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
109381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                *outUnigramProbability =
110381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
111381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                                &pos);
112381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                return ++wordPos;
113381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
11427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            // We need to skip past this PtNode, so skip any remaining code points after the
115381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // first and possibly the probability.
116381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
117381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
118381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
119381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (PatriciaTrieReadingUtils::isTerminal(flags)) {
120381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
121381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
12227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            // The fact that this PtNode has children is very important. Since we already know
12327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            // that this PtNode does not match, if it has no children we know it is irrelevant
124381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // to what we are searching for.
125381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
126381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // We will write in `found' whether we have passed the children position we are
127381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // searching for. For example if we search for "beer", the children of b are less
128381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // than the address we are searching for and the children of c are greater. When we
129381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // come here for c, we realize this is too big, and that we should descend b.
130381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            bool found;
131381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (hasChildren) {
132381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                int currentPos = pos;
133381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // Here comes the tricky part. First, read the children position.
134381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                const int childrenPos = PatriciaTrieReadingUtils
135381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
13677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi                if (childrenPos > ptNodePos) {
137381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // If the children pos is greater than the position, it means the previous
13827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // PtNode, which position is stored in lastCandidatePtNodePos, was the right
139381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // one.
140381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    found = true;
14127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                } else if (1 >= ptNodeCount) {
14227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // However if we are on the LAST PtNode of this array, and we have NOT shot the
14327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // position we should descend THIS node. So we trick the lastCandidatePtNodePos
14427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // so that we will descend this PtNode, not the previous one.
14527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    lastCandidatePtNodePos = startPos;
146381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    found = true;
147381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                } else {
148381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // Else, we should continue looking.
149381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    found = false;
150381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
151381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            } else {
15227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // Even if we don't have children here, we could still be on the last PtNode of /
15327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // this array. If this is the case, we should descend the last PtNode that had
15427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // children, and their position is already in lastCandidatePtNodePos.
15527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                found = (1 >= ptNodeCount);
156381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
157381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi
158381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (found) {
15927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // Okay, we found the PtNode we should descend. Its position is in
16027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // the lastCandidatePtNodePos variable, so we just re-read it.
16127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                if (0 != lastCandidatePtNodePos) {
162381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    const PatriciaTrieReadingUtils::NodeFlags lastFlags =
163381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
16427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                                    mDictRoot, &lastCandidatePtNodePos);
165381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
16627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                            mDictRoot, &lastCandidatePtNodePos);
16727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // We copy all the characters in this PtNode to the buffer
168381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    outCodePoints[wordPos] = lastChar;
169381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
170381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
17127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                                mDictRoot, &lastCandidatePtNodePos);
172381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        int charCount = maxCodePointCount;
173381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        while (-1 != nextChar && --charCount > 0) {
174381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            outCodePoints[++wordPos] = nextChar;
175381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
17627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                                    mDictRoot, &lastCandidatePtNodePos);
177381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        }
178381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
179381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    ++wordPos;
180381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // Now we only need to branch to the children address. Skip the probability if
181381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // it's there, read pos, and break to resume the search at pos.
182381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
183381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
18427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                                &lastCandidatePtNodePos);
185381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
186381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
18727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                            mDictRoot, lastFlags, &lastCandidatePtNodePos);
188381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    break;
189381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                } else {
190381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // Here is a little tricky part: we come here if we found out that all children
19127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // addresses in this PtNode are bigger than the address we are searching for.
192381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // Should we conclude the word is not in the dictionary? No! It could still be
19327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // one of the remaining PtNodes in this array, so we have to keep looking in
19427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // this array until we find it (or we realize it's not there either, in which
19527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // case it's actually not in the dictionary). Pass the end of this PtNode,
19627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // ready to start the next one.
197381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
198381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
199381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                                mDictRoot, flags, &pos);
200381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
201381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
202381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        mShortcutListPolicy.skipAllShortcuts(&pos);
203381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
204381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
205381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        mBigramListPolicy.skipAllBigrams(&pos);
206381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
207381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
208381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            } else {
209381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // If we did not find it, we should record the last children address for the next
210381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // iteration.
21127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                if (hasChildren) lastCandidatePtNodePos = startPos;
21227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // Now skip the end of this PtNode (children pos and the attributes if any) so that
21327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // our pos is after the end of this PtNode, at the start of the next one.
214381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
215381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
216381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            mDictRoot, flags, &pos);
217381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
218381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
219381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    mShortcutListPolicy.skipAllShortcuts(&pos);
220381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
221381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
222381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    mBigramListPolicy.skipAllBigrams(&pos);
223381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
224381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
225381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi
226381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        }
227381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    }
22877ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    // If we have looked through all the PtNodes and found no match, the ptNodePos is
229381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    // not the position of a terminal in this dictionary.
230381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    return 0;
231c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi}
232c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
233381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// This function gets the position of the terminal node of the exact matching word in the
234cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi// dictionary. If no match is found, it returns NOT_A_DICT_POS.
235e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagiint PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
236c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        const int length, const bool forceLowerCaseSearch) const {
237381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    int pos = getRootPosition();
238381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    int wordPos = 0;
239381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi
240381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    while (true) {
241381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        // If we already traversed the tree further than the word is long, there means
242381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        // there was no match (or we would have found it).
243cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi        if (wordPos >= length) return NOT_A_DICT_POS;
24427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi        int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
245381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                &pos);
246381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        const int wChar = forceLowerCaseSearch
247381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
248381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        while (true) {
24927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            // If there are no more PtNodes in this array, it means we could not
250381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // find a matching character for this depth, therefore there is no match.
251cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi            if (0 >= ptNodeCount) return NOT_A_DICT_POS;
25227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            const int ptNodePos = pos;
253381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const PatriciaTrieReadingUtils::NodeFlags flags =
254381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
255381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
256381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    &pos);
257381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (character == wChar) {
25827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // This is the correct PtNode. Only one PtNode may start with the same char within
25927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // a PtNode array, so either we found our match in this array, or there is
260cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi                // no match and we can return NOT_A_DICT_POS. So we will check all the
26127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // characters in this PtNode indeed does match.
262381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
263381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
264381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            &pos);
265381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    while (NOT_A_CODE_POINT != character) {
266381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        ++wordPos;
267381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        // If we shoot the length of the word we search for, or if we find a single
268381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        // character that does not match, as explained above, it means the word is
26927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                        // not in the dictionary (by virtue of this PtNode being the only one to
270381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        // match the word on the first character, but not matching the whole word).
271cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi                        if (wordPos >= length) return NOT_A_DICT_POS;
272cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi                        if (inWord[wordPos] != character) return NOT_A_DICT_POS;
273381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
274381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                                mDictRoot, &pos);
275381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
276381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
277381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // If we come here we know that so far, we do match. Either we are on a terminal
278381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // and we match the length, in which case we found it, or we traverse children.
279381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // If we don't match the length AND don't have children, then a word in the
280381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // dictionary fully matches a prefix of the searched word but not the full word.
281381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                ++wordPos;
282381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::isTerminal(flags)) {
283381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (wordPos == length) {
28427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                        return ptNodePos;
285381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
286381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
287381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
288381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
289cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi                    return NOT_A_DICT_POS;
290381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
291381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // We have children and we are still shorter than the word we are searching for, so
292381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // we need to traverse children. Put the pointer on the children position, and
293381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // break
294381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
295381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        flags, &pos);
296381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                break;
297381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            } else {
29827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // This PtNode does not match, so skip the remaining part and go to the next.
299381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
300381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
301381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            &pos);
302381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
303381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::isTerminal(flags)) {
304381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
305381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
306381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
307381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
308381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            flags, &pos);
309381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
310381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
311381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    mShortcutListPolicy.skipAllShortcuts(&pos);
312381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
313381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
314381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    mBigramListPolicy.skipAllBigrams(&pos);
315381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
316381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
31727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            --ptNodeCount;
318381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        }
319381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    }
320c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi}
321c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
32265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagiint PatriciaTriePolicy::getProbability(const int unigramProbability,
32365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        const int bigramProbability) const {
32465d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    if (unigramProbability == NOT_A_PROBABILITY) {
32565d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        return NOT_A_PROBABILITY;
32665d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    } else if (bigramProbability == NOT_A_PROBABILITY) {
32765d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        return ProbabilityUtils::backoff(unigramProbability);
32865d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    } else {
32965d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
33065d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi                bigramProbability);
33165d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    }
33265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi}
33365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi
33477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
33577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    if (ptNodePos == NOT_A_DICT_POS) {
336647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_PROBABILITY;
337647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
33877ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    int pos = ptNodePos;
339647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    const PatriciaTrieReadingUtils::NodeFlags flags =
340e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
341647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
342647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_PROBABILITY;
343647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
344647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (PatriciaTrieReadingUtils::isNotAWord(flags)
345647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi            || PatriciaTrieReadingUtils::isBlacklisted(flags)) {
346c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        // If this is not a word, or if it's a blacklisted entry, it should behave as
347c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        // having no probability outside of the suggestion process (where it should be used
348c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        // for shortcuts).
349c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        return NOT_A_PROBABILITY;
350c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi    }
351e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
35265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
35365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi            mDictRoot, &pos), NOT_A_PROBABILITY);
354c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi}
355c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
35677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
35777ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    if (ptNodePos == NOT_A_DICT_POS) {
358647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_DICT_POS;
359647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
36077ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    int pos = ptNodePos;
361647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    const PatriciaTrieReadingUtils::NodeFlags flags =
362e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
363647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
364647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_DICT_POS;
365647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
366e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
367647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (PatriciaTrieReadingUtils::isTerminal(flags)) {
368e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
369647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
370647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
371e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
372647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
373647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    return pos;
374c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi}
375c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi
37677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
37777ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    if (ptNodePos == NOT_A_DICT_POS) {
378647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_DICT_POS;
379647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
38077ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    int pos = ptNodePos;
381647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    const PatriciaTrieReadingUtils::NodeFlags flags =
382e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
383647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
384647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_DICT_POS;
385647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
386e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
387647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (PatriciaTrieReadingUtils::isTerminal(flags)) {
388e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
389647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
390647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
391e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
392647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
393647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
394fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi        mShortcutListPolicy.skipAllShortcuts(&pos);;
395647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
396647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    return pos;
397c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi}
398c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi
399647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagiint PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
40077ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi        const int ptNodePos, DicNodeVector *childDicNodes) const {
40177ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    int pos = ptNodePos;
402647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    const PatriciaTrieReadingUtils::NodeFlags flags =
403e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
4041fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    int mergedNodeCodePoints[MAX_WORD_LENGTH];
405647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
406e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi            mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
407647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
408e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi            PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
409647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi                    : NOT_A_PROBABILITY;
410647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
411647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi            PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
412e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi                    mDictRoot, flags, &pos) : NOT_A_DICT_POS;
413647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
414fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi        getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
415647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
416647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
417668870be431d17ee4ceb5ce161aee1189063af18Keisuke Kuroyanagi        getBigramsStructurePolicy()->skipAllBigrams(&pos);
418647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
4199155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi    if (mergedNodeCodePointCount <= 0) {
4209155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi        AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
4219155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi        ASSERT(false);
4229155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi        return pos;
4239155eec0d9a6749879b413a22f30ede2e170ce19Keisuke Kuroyanagi    }
42477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
4257fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            PatriciaTrieReadingUtils::isTerminal(flags),
4267fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
4277fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            PatriciaTrieReadingUtils::isBlacklisted(flags) ||
4287fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::isNotAWord(flags),
4297fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            mergedNodeCodePointCount, mergedNodeCodePoints);
430647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    return pos;
4311fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi}
4321fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi
433c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} // namespace latinime
434