1c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi/*
2c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Copyright (C) 2013, The Android Open Source Project
3c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
4c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Licensed under the Apache License, Version 2.0 (the "License");
5c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * you may not use this file except in compliance with the License.
6c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * You may obtain a copy of the License at
7c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
8c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *     http://www.apache.org/licenses/LICENSE-2.0
9c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
10c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Unless required by applicable law or agreed to in writing, software
11c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * distributed under the License is distributed on an "AS IS" BASIS,
12c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * See the License for the specific language governing permissions and
14c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * limitations under the License.
15c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi */
16c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
1788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v2/patricia_trie_policy.h"
18c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
19c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "defines.h"
20c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node.h"
21c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node_vector.h"
2288bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/ngram_listener.h"
2388bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/ngram_context.h"
2488bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
2588bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/pt_common/patricia_trie_reading_utils.h"
2688bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/binary_dictionary_bigrams_iterator.h"
2788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/multi_bigram_map.h"
2888bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/probability_utils.h"
2979ba633402ceeebe216055cbd99a9e9701460f4aKeisuke Kuroyanagi#include "utils/char_utils.h"
30c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
31c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynaginamespace latinime {
32c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
332fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasavoid PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
347fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        DicNodeVector *const childDicNodes) const {
351fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    if (!dicNode->hasChildren()) {
361fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi        return;
371fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    }
382fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    int nextPos = dicNode->getChildrenPtNodeArrayPos();
39180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi    if (!isValidPos(nextPos)) {
40180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi        AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %zd",
41180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                nextPos, mBuffer.size());
42b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi        mIsCorrupted = true;
43009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi        ASSERT(false);
44009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi        return;
45009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi    }
4627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
47180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi            mBuffer.data(), &nextPos);
481fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    for (int i = 0; i < childCount; i++) {
49180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi        if (!isValidPos(nextPos)) {
50180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi            AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %zd, childCount: %d / %d",
51180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                    nextPos, mBuffer.size(), i, childCount);
52b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi            mIsCorrupted = true;
53009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi            ASSERT(false);
54009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi            return;
55009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi        }
567fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
571fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    }
58c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi}
59c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
6065a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagiint PatriciaTriePolicy::getCodePointsAndReturnCodePointCount(const int wordId,
6165a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi        const int maxCodePointCount, int *const outCodePoints) const {
6265a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi    return getCodePointsAndProbabilityAndReturnCodePointCount(wordId, maxCodePointCount,
6365a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi            outCodePoints, nullptr /* outUnigramProbability */);
6465a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi}
6594e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi// This retrieves code points and the probability of the word by its id.
66381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
672fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// it is possible to check for this with advantageous complexity. For each PtNode array, we search
6827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// for PtNodes with children and compare the children position with the position we look for.
69381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// When we shoot the position we look for, it means the word we look for is in the children
7027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
7127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// PtNode array with the last PtNode's children position still less than what we are searching for,
7227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// we must descend the last PtNode's children (for example, if the word we are searching for starts
7327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
742fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// than the position we look for, and we have to descend the z PtNode).
75381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi/* Parameters :
7694e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi * wordId: Id of the word we are searching for.
77381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
78381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * outUnigramProbability: a pointer to an int to write the probability into.
79381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * Return value : the code point count, of 0 if the word was not found.
80381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi */
81381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// TODO: Split this function to be more readable
821311cdcb6233abde792a9d9fdd294334c9be7043Keisuke Kuroynagiint PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
8394e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi        const int wordId, const int maxCodePointCount, int *const outCodePoints,
84c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        int *const outUnigramProbability) const {
8594e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi    const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
86381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    int pos = getRootPosition();
87381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    int wordPos = 0;
88fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto    const int *const codePointTable = mHeaderPolicy.getCodePointTable();
8965a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi    if (outUnigramProbability) {
9065a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi        *outUnigramProbability = NOT_A_PROBABILITY;
9165a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi    }
9227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
932fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    // only traverse PtNodes that are actually a part of the terminal we are searching, so each
942fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    // time we enter this loop we are one depth level further than last time.
952fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    // The only reason we count PtNodes is because we want to reduce the probability of infinite
96381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    // looping in case there is a bug. Since we know there is an upper bound to the depth we are
97381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    // supposed to traverse, it does not hurt to count iterations.
98381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
9927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi        int lastCandidatePtNodePos = 0;
10027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi        // Let's loop through PtNodes in this PtNode array searching for either the terminal
101381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        // or one of its ascendants.
102180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi        if (!isValidPos(pos)) {
103180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi            AKLOGE("PtNode array position is invalid. pos: %d, dict size: %zd",
104180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                    pos, mBuffer.size());
105be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi            mIsCorrupted = true;
106be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi            ASSERT(false);
107be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi            return 0;
108be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi        }
10927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi        for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
110180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                mBuffer.data(), &pos); ptNodeCount > 0; --ptNodeCount) {
111381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const int startPos = pos;
112180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi            if (!isValidPos(pos)) {
113180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                AKLOGE("PtNode position is invalid. pos: %d, dict size: %zd", pos, mBuffer.size());
114be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi                mIsCorrupted = true;
115be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi                ASSERT(false);
116be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi                return 0;
117be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi            }
118381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const PatriciaTrieReadingUtils::NodeFlags flags =
119180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mBuffer.data(), &pos);
120381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
121fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto                    mBuffer.data(), codePointTable, &pos);
12277ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi            if (ptNodePos == startPos) {
123381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // We found the position. Copy the rest of the code points in the buffer and return
124381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // the length.
125381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                outCodePoints[wordPos] = character;
126381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
127381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
128fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto                            mBuffer.data(), codePointTable, &pos);
129381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // We count code points in order to avoid infinite loops if the file is broken
130381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // or if there is some other bug
131381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    int charCount = maxCodePointCount;
132381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
133381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        outCodePoints[++wordPos] = nextChar;
134381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
135fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto                                mBuffer.data(), codePointTable, &pos);
136381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
137381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
13865a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi                if (outUnigramProbability) {
13965a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi                    *outUnigramProbability =
14065a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi                            PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
14165a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi                                    mBuffer.data(), &pos);
14265a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi                }
143381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                return ++wordPos;
144381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
14527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            // We need to skip past this PtNode, so skip any remaining code points after the
146381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // first and possibly the probability.
147381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
148180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                PatriciaTrieReadingUtils::skipCharacters(mBuffer.data(), flags, MAX_WORD_LENGTH,
149fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto                        codePointTable, &pos);
150381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
151381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (PatriciaTrieReadingUtils::isTerminal(flags)) {
152180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(), &pos);
153381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
15427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            // The fact that this PtNode has children is very important. Since we already know
15527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi            // that this PtNode does not match, if it has no children we know it is irrelevant
156381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // to what we are searching for.
157381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
158381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // We will write in `found' whether we have passed the children position we are
159381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // searching for. For example if we search for "beer", the children of b are less
160381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // than the address we are searching for and the children of c are greater. When we
161381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            // come here for c, we realize this is too big, and that we should descend b.
162381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            bool found;
163381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (hasChildren) {
164381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                int currentPos = pos;
165381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // Here comes the tricky part. First, read the children position.
166381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                const int childrenPos = PatriciaTrieReadingUtils
167180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                        ::readChildrenPositionAndAdvancePosition(mBuffer.data(), flags,
168180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                                &currentPos);
16977ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi                if (childrenPos > ptNodePos) {
170381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // If the children pos is greater than the position, it means the previous
17127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // PtNode, which position is stored in lastCandidatePtNodePos, was the right
172381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // one.
173381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    found = true;
17427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                } else if (1 >= ptNodeCount) {
17527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // However if we are on the LAST PtNode of this array, and we have NOT shot the
1762fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                    // position we should descend THIS PtNode. So we trick the
1772fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                    // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
1782fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                    // one.
17927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    lastCandidatePtNodePos = startPos;
180381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    found = true;
181381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                } else {
182381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // Else, we should continue looking.
183381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    found = false;
184381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
185381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            } else {
1862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                // Even if we don't have children here, we could still be on the last PtNode of
18727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // this array. If this is the case, we should descend the last PtNode that had
18827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // children, and their position is already in lastCandidatePtNodePos.
18927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                found = (1 >= ptNodeCount);
190381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
191381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi
192381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            if (found) {
19327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // Okay, we found the PtNode we should descend. Its position is in
19427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // the lastCandidatePtNodePos variable, so we just re-read it.
19527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                if (0 != lastCandidatePtNodePos) {
196381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    const PatriciaTrieReadingUtils::NodeFlags lastFlags =
197381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
198180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                                    mBuffer.data(), &lastCandidatePtNodePos);
199381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
200fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto                            mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
20127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // We copy all the characters in this PtNode to the buffer
202381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    outCodePoints[wordPos] = lastChar;
203381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
204381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
205fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto                                mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
206381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        int charCount = maxCodePointCount;
207381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        while (-1 != nextChar && --charCount > 0) {
208381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            outCodePoints[++wordPos] = nextChar;
209381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                            nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
210fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto                                    mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
211381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        }
212381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
213381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    ++wordPos;
214381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // Now we only need to branch to the children address. Skip the probability if
215381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // it's there, read pos, and break to resume the search at pos.
216381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
217180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(),
21827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                                &lastCandidatePtNodePos);
219381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
220381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
221180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                            mBuffer.data(), lastFlags, &lastCandidatePtNodePos);
222381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    break;
223381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                } else {
224381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // Here is a little tricky part: we come here if we found out that all children
22527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // addresses in this PtNode are bigger than the address we are searching for.
226381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    // Should we conclude the word is not in the dictionary? No! It could still be
22727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // one of the remaining PtNodes in this array, so we have to keep looking in
22827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // this array until we find it (or we realize it's not there either, in which
22927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // case it's actually not in the dictionary). Pass the end of this PtNode,
23027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                    // ready to start the next one.
231381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
232381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
233180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                                mBuffer.data(), flags, &pos);
234381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
235381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
236381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                        mShortcutListPolicy.skipAllShortcuts(&pos);
237381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
238381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
2393225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                        if (!mBigramListPolicy.skipAllBigrams(&pos)) {
240180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                            AKLOGE("Cannot skip bigrams. BufSize: %zd, pos: %d.", mBuffer.size(),
2413225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                                    pos);
2423225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                            mIsCorrupted = true;
2433225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                            ASSERT(false);
2443225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                            return 0;
2453225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                        }
246381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    }
247381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
248381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            } else {
249381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // If we did not find it, we should record the last children address for the next
250381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                // iteration.
25127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                if (hasChildren) lastCandidatePtNodePos = startPos;
25227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // Now skip the end of this PtNode (children pos and the attributes if any) so that
25327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi                // our pos is after the end of this PtNode, at the start of the next one.
254381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
255381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
256180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                            mBuffer.data(), flags, &pos);
257381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
258381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
259381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                    mShortcutListPolicy.skipAllShortcuts(&pos);
260381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
261381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
2623225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                    if (!mBigramListPolicy.skipAllBigrams(&pos)) {
263180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi                        AKLOGE("Cannot skip bigrams. BufSize: %zd, pos: %d.", mBuffer.size(), pos);
2643225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                        mIsCorrupted = true;
2653225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                        ASSERT(false);
2663225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                        return 0;
2673225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi                    }
268381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi                }
269381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi            }
270381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi
271381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi        }
272381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    }
27377ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    // If we have looked through all the PtNodes and found no match, the ptNodePos is
274381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    // not the position of a terminal in this dictionary.
275381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi    return 0;
276c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi}
277c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
2782fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// This function gets the position of the terminal PtNode of the exact matching word in the
27989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi// dictionary. If no match is found, it returns NOT_A_WORD_ID.
28089a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagiint PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
2816ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi        const bool forceLowerCaseSearch) const {
282be6117058840492c2862f8ae9f7dc95c29f3a8f3Keisuke Kuroyanagi    DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
283be6117058840492c2862f8ae9f7dc95c29f3a8f3Keisuke Kuroyanagi    readingHelper.initWithPtNodeArrayPos(getRootPosition());
2846ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi    const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(),
2856ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi            wordCodePoints.size(), forceLowerCaseSearch);
286b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi    if (readingHelper.isError()) {
287b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi        mIsCorrupted = true;
28889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi        AKLOGE("Dictionary reading error in getWordId().");
289b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi    }
29089a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    return getWordIdFromTerminalPtNodePos(ptNodePos);
291c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi}
292c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
293537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagiconst WordAttributes PatriciaTriePolicy::getWordAttributesInContext(
294537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi        const WordIdArrayView prevWordIds, const int wordId,
295537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi        MultiBigramMap *const multiBigramMap) const {
2969f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi    if (wordId == NOT_A_WORD_ID) {
2972111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi        return WordAttributes();
2989f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi    }
2999f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi    const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
3009f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi    const PtNodeParams ptNodeParams =
3019f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi            mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
3029f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi    if (multiBigramMap) {
3032111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi        const int probability =  multiBigramMap->getBigramProbability(this /* structurePolicy */,
3042111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi                prevWordIds, wordId, ptNodeParams.getProbability());
3052111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi        return getWordAttributes(probability, ptNodeParams);
3069f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi    }
307537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi    if (!prevWordIds.empty()) {
3089f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi        const int bigramProbability = getProbabilityOfWord(prevWordIds, wordId);
3099f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi        if (bigramProbability != NOT_A_PROBABILITY) {
3102111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi            return getWordAttributes(bigramProbability, ptNodeParams);
3119f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi        }
3129f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi    }
3132111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi    return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY),
3142111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi            ptNodeParams);
3152111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi}
3162111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi
3172111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagiconst WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability,
3182111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi        const PtNodeParams &ptNodeParams) const {
3197c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu    return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
3207c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu            ptNodeParams.isPossiblyOffensive());
3219f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi}
3229f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi
32365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagiint PatriciaTriePolicy::getProbability(const int unigramProbability,
32465d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        const int bigramProbability) const {
3258681bef03c1ca864d3de0ae27adb5cbfb63f0fefKeisuke Kuroyanagi    // Due to space constraints, the probability for bigrams is approximate - the lower the unigram
3268681bef03c1ca864d3de0ae27adb5cbfb63f0fefKeisuke Kuroyanagi    // probability, the worse the precision. The theoritical maximum error in resulting probability
3278681bef03c1ca864d3de0ae27adb5cbfb63f0fefKeisuke Kuroyanagi    // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
3288681bef03c1ca864d3de0ae27adb5cbfb63f0fefKeisuke Kuroyanagi    // that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
32965d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    if (unigramProbability == NOT_A_PROBABILITY) {
33065d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        return NOT_A_PROBABILITY;
33165d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    } else if (bigramProbability == NOT_A_PROBABILITY) {
33265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        return ProbabilityUtils::backoff(unigramProbability);
33365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    } else {
33465d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
33565d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi                bigramProbability);
33665d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    }
33765d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi}
33865d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi
339537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagiint PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
340537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi        const int wordId) const {
34189a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    if (wordId == NOT_A_WORD_ID) {
342647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_PROBABILITY;
343647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
34489a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
3450fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi    const PtNodeParams ptNodeParams =
3460fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi            mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
3477c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu    if (ptNodeParams.isNotAWord()) {
3487c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu        // If this is not a word, it should behave as having no probability outside of the
3497c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu        // suggestion process (where it should be used for shortcuts).
350c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        return NOT_A_PROBABILITY;
3511229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi    }
352537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi    if (!prevWordIds.empty()) {
35389a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi        const int bigramsPosition = getBigramsPositionOfPtNode(
35489a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi                getTerminalPtNodePosFromWordId(prevWordIds[0]));
3550e6a1d1020c537d847ac4222cf621dea9db4311eKeisuke Kuroyanagi        BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
3561229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi        while (bigramsIt.hasNext()) {
3571229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi            bigramsIt.next();
3581229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi            if (bigramsIt.getBigramPos() == ptNodePos
3591229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi                    && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
3601229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi                return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
3611229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi            }
3621229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi        }
3631229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi        return NOT_A_PROBABILITY;
364c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi    }
3651e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
366c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi}
367c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
368537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagivoid PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds,
3692d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi        NgramListener *const listener) const {
370537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi    if (prevWordIds.empty()) {
371bd1f59bda5ad0b7028ec06c2de078f1623e76cddKeisuke Kuroyanagi        return;
372bd1f59bda5ad0b7028ec06c2de078f1623e76cddKeisuke Kuroyanagi    }
37389a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    const int bigramsPosition = getBigramsPositionOfPtNode(
37489a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi            getTerminalPtNodePosFromWordId(prevWordIds[0]));
3750e6a1d1020c537d847ac4222cf621dea9db4311eKeisuke Kuroyanagi    BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
3762d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi    while (bigramsIt.hasNext()) {
3772d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi        bigramsIt.next();
37889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi        listener->onVisitEntry(bigramsIt.getProbability(),
37989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi                getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()));
3802d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi    }
3812d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi}
3822d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi
383537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke KuroyanagiBinaryDictionaryShortcutIterator PatriciaTriePolicy::getShortcutIterator(const int wordId) const {
384ac983b13a9ee97d9a1a0ecd4a12dff5b2e2552e5Keisuke Kuroyanagi    const int shortcutPos = getShortcutPositionOfPtNode(getTerminalPtNodePosFromWordId(wordId));
385847a026cd81f52f7aee0d5fd75c14a3f8619ebafKeisuke Kuroyanagi    return BinaryDictionaryShortcutIterator(&mShortcutListPolicy, shortcutPos);
386847a026cd81f52f7aee0d5fd75c14a3f8619ebafKeisuke Kuroyanagi}
387847a026cd81f52f7aee0d5fd75c14a3f8619ebafKeisuke Kuroyanagi
38877ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
38977ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    if (ptNodePos == NOT_A_DICT_POS) {
390647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_DICT_POS;
391647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
3920fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi    return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
393c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi}
394c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi
39577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
39677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    if (ptNodePos == NOT_A_DICT_POS) {
397647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return NOT_A_DICT_POS;
398647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
3990fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi    return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getBigramsPos();
400c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi}
401c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi
402647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagiint PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
40377ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi        const int ptNodePos, DicNodeVector *childDicNodes) const {
4041e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    PatriciaTrieReadingUtils::NodeFlags flags;
4051e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    int mergedNodeCodePointCount = 0;
4061fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi    int mergedNodeCodePoints[MAX_WORD_LENGTH];
4071e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    int probability = NOT_A_PROBABILITY;
4081e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    int childrenPos = NOT_A_DICT_POS;
4091e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    int shortcutPos = NOT_A_DICT_POS;
4101e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    int bigramPos = NOT_A_DICT_POS;
4111e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    int siblingPos = NOT_A_DICT_POS;
412fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto    const int *const codePointTable = mHeaderPolicy.getCodePointTable();
413180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi    PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, &mShortcutListPolicy,
414fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto            &mBigramListPolicy, codePointTable, &flags, &mergedNodeCodePointCount,
415fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto            mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos,
416fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto            &siblingPos);
41779ba633402ceeebe216055cbd99a9e9701460f4aKeisuke Kuroyanagi    // Skip PtNodes don't start with Unicode code point because they represent non-word information.
41879ba633402ceeebe216055cbd99a9e9701460f4aKeisuke Kuroyanagi    if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
4197d475003572c9c2902f5918bad524f4ac233e629Keisuke Kuroyanagi        const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
420d53aea5af934f8bf7f0cd46ed578fe531dff96c5Keisuke Kuroyanagi        childDicNodes->pushLeavingChild(dicNode, childrenPos, wordId,
421521e2382da833f114477f4ef68db68a8d30b64d6Keisuke Kuroyanagi                CodePointArrayView(mergedNodeCodePoints, mergedNodeCodePointCount));
42279ba633402ceeebe216055cbd99a9e9701460f4aKeisuke Kuroyanagi    }
4231e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    return siblingPos;
4241fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi}
4251fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi
4266ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagiconst WordProperty PatriciaTriePolicy::getWordProperty(
4276ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi        const CodePointArrayView wordCodePoints) const {
42889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);
42989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    if (wordId == NOT_A_WORD_ID) {
430c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        AKLOGE("getWordProperty was called for invalid word.");
431c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        return WordProperty();
432c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    }
43389a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
4340fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi    const PtNodeParams ptNodeParams =
4350fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi            mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
436c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    // Fetch bigram information.
43779bb37d499ed6fcabe981153d5ff0b5b69509933Keisuke Kuroyanagi    std::vector<NgramProperty> ngrams;
438c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
439c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    int bigramWord1CodePoints[MAX_WORD_LENGTH];
440b00973952f269ebee6d1d5f808fad7ca64fb9954Keisuke Kuroyanagi    BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos);
441c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    while (bigramsIt.hasNext()) {
442c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        // Fetch the next bigram information and forward the iterator.
443c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        bigramsIt.next();
444c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        // Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
445c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
446c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi            int word1Probability = NOT_A_PROBABILITY;
447ca42ec0f44707529fc067da36ba9d972641e4897Keisuke Kuroyanagi            const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
44894e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi                    getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH,
44994e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi                    bigramWord1CodePoints, &word1Probability);
450a34bdc395b5ce51a87ff3f550b1025fbe442098aKeisuke Kuroyanagi            const int probability = getProbability(word1Probability, bigramsIt.getProbability());
45179bb37d499ed6fcabe981153d5ff0b5b69509933Keisuke Kuroyanagi            ngrams.emplace_back(
45288bb28c132d87f15a52e9a0b8a45950f39eb19adKeisuke Kuroyanagi                    NgramContext(wordCodePoints.data(), wordCodePoints.size(),
45388bb28c132d87f15a52e9a0b8a45950f39eb19adKeisuke Kuroyanagi                            ptNodeParams.representsBeginningOfSentence()),
4542842e50c4b454f44cfb49a59b4ba2c13816a876dKeisuke Kuroyanagi                    CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
455287e155e44b4e937f2a62d010805702bc813c43bKeisuke Kuroyanagi                    probability, HistoricalInfo());
456c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        }
457c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    }
458c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    // Fetch shortcut information.
459e41b2ed8d31b84308f77a0bd14c5eecc5a17960aKeisuke Kuroyanagi    std::vector<UnigramProperty::ShortcutProperty> shortcuts;
460c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
461c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    if (shortcutPos != NOT_A_DICT_POS) {
462c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        int shortcutTargetCodePoints[MAX_WORD_LENGTH];
46359ebd5171820785f4c7b893f48df8b9d65cc4902Keisuke Kuroyanagi        ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mBuffer, &shortcutPos);
464c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        bool hasNext = true;
465c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        while (hasNext) {
466c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi            const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
46759ebd5171820785f4c7b893f48df8b9d65cc4902Keisuke Kuroyanagi                    ShortcutListReadingUtils::getFlagsAndForwardPointer(mBuffer, &shortcutPos);
468c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi            hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
469c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi            const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
47059ebd5171820785f4c7b893f48df8b9d65cc4902Keisuke Kuroyanagi                    mBuffer, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
471c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi            const int shortcutProbability =
472c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi                    ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
4732842e50c4b454f44cfb49a59b4ba2c13816a876dKeisuke Kuroyanagi            shortcuts.emplace_back(
4742842e50c4b454f44cfb49a59b4ba2c13816a876dKeisuke Kuroyanagi                    CodePointArrayView(shortcutTargetCodePoints, shortcutTargetLength).toVector(),
4752842e50c4b454f44cfb49a59b4ba2c13816a876dKeisuke Kuroyanagi                    shortcutProbability);
476c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi        }
477c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi    }
4782a015dcb25b2996ccca0d9fac74b334aa35928a3Keisuke Kuroyanagi    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
47905172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu            ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
48005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu            ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts));
481bbf0d4141bf4c5ee1026a8dfe45d46a416ad35b1Keisuke Kuroyanagi    return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams);
482c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi}
483c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi
484f7322b166b88f72b19509d8416700d4ec8ea7753Keisuke Kuroyanagiint PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
485f7322b166b88f72b19509d8416700d4ec8ea7753Keisuke Kuroyanagi        int *const outCodePointCount) {
486f7322b166b88f72b19509d8416700d4ec8ea7753Keisuke Kuroyanagi    *outCodePointCount = 0;
4870fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    if (token == 0) {
4880fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        // Start iterating the dictionary.
4890fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        mTerminalPtNodePositionsForIteratingWords.clear();
4900fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
4910fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi                &mTerminalPtNodePositionsForIteratingWords);
4920fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
4930fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        readingHelper.initWithPtNodeArrayPos(getRootPosition());
4940fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
4950fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    }
4960fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    const int terminalPtNodePositionsVectorSize =
4970fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi            static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
4980fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
4990fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        AKLOGE("Given token %d is invalid.", token);
5000fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        return 0;
5010fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    }
5020fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
50365a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi    *outCodePointCount = getCodePointsAndReturnCodePointCount(
50465a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi            getWordIdFromTerminalPtNodePos(terminalPtNodePos), MAX_WORD_LENGTH, outCodePoints);
5050fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    const int nextToken = token + 1;
5060fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    if (nextToken >= terminalPtNodePositionsVectorSize) {
5070fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        // All words have been iterated.
5080fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        mTerminalPtNodePositionsForIteratingWords.clear();
5090fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        return 0;
5100fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    }
5110fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi    return nextToken;
5120fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi}
5130fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi
51489a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagiint PatriciaTriePolicy::getWordIdFromTerminalPtNodePos(const int ptNodePos) const {
51589a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    return ptNodePos == NOT_A_DICT_POS ? NOT_A_WORD_ID : ptNodePos;
51689a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi}
51789a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi
51889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagiint PatriciaTriePolicy::getTerminalPtNodePosFromWordId(const int wordId) const {
51989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    return wordId == NOT_A_WORD_ID ? NOT_A_DICT_POS : wordId;
52089a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi}
521180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi
522180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagibool PatriciaTriePolicy::isValidPos(const int pos) const {
523180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi    return pos >= 0 && pos < static_cast<int>(mBuffer.size());
524180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi}
525180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi
526c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} // namespace latinime
527