1c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi/* 2c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Copyright (C) 2013, The Android Open Source Project 3c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * 4c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Licensed under the Apache License, Version 2.0 (the "License"); 5c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * you may not use this file except in compliance with the License. 6c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * You may obtain a copy of the License at 7c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * 8c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * http://www.apache.org/licenses/LICENSE-2.0 9c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * 10c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Unless required by applicable law or agreed to in writing, software 11c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * distributed under the License is distributed on an "AS IS" BASIS, 12c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * See the License for the specific language governing permissions and 14c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * limitations under the License. 15c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi */ 16c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 1788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v2/patricia_trie_policy.h" 18c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 19c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "defines.h" 20c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node.h" 21c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node_vector.h" 2288bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/ngram_listener.h" 2388bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/ngram_context.h" 2488bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/pt_common/dynamic_pt_reading_helper.h" 2588bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/pt_common/patricia_trie_reading_utils.h" 2688bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/binary_dictionary_bigrams_iterator.h" 2788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/multi_bigram_map.h" 2888bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/probability_utils.h" 2979ba633402ceeebe216055cbd99a9e9701460f4aKeisuke Kuroyanagi#include "utils/char_utils.h" 30c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 31c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynaginamespace latinime { 32c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 332fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasavoid PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, 347fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi DicNodeVector *const childDicNodes) const { 351fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi if (!dicNode->hasChildren()) { 361fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi return; 371fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi } 382fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa int nextPos = dicNode->getChildrenPtNodeArrayPos(); 39180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi if (!isValidPos(nextPos)) { 40180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %zd", 41180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi nextPos, mBuffer.size()); 42b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi mIsCorrupted = true; 43009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi ASSERT(false); 44009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi return; 45009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi } 4627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( 47180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi mBuffer.data(), &nextPos); 481fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi for (int i = 0; i < childCount; i++) { 49180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi if (!isValidPos(nextPos)) { 50180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %zd, childCount: %d / %d", 51180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi nextPos, mBuffer.size(), i, childCount); 52b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi mIsCorrupted = true; 53009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi ASSERT(false); 54009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi return; 55009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi } 567fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); 571fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi } 58c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} 59c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 6065a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagiint PatriciaTriePolicy::getCodePointsAndReturnCodePointCount(const int wordId, 6165a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi const int maxCodePointCount, int *const outCodePoints) const { 6265a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi return getCodePointsAndProbabilityAndReturnCodePointCount(wordId, maxCodePointCount, 6365a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi outCodePoints, nullptr /* outUnigramProbability */); 6465a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi} 6594e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi// This retrieves code points and the probability of the word by its id. 66381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// Due to the fact that words are ordered in the dictionary in a strict breadth-first order, 672fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// it is possible to check for this with advantageous complexity. For each PtNode array, we search 6827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// for PtNodes with children and compare the children position with the position we look for. 69381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// When we shoot the position we look for, it means the word we look for is in the children 7027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a 7127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// PtNode array with the last PtNode's children position still less than what we are searching for, 7227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// we must descend the last PtNode's children (for example, if the word we are searching for starts 7327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi// with a z, it's the last PtNode of the root array, so all children addresses will be smaller 742fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// than the position we look for, and we have to descend the z PtNode). 75381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi/* Parameters : 7694e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi * wordId: Id of the word we are searching for. 77381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size. 78381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * outUnigramProbability: a pointer to an int to write the probability into. 79381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi * Return value : the code point count, of 0 if the word was not found. 80381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi */ 81381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi// TODO: Split this function to be more readable 821311cdcb6233abde792a9d9fdd294334c9be7043Keisuke Kuroynagiint PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( 8394e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi const int wordId, const int maxCodePointCount, int *const outCodePoints, 84c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi int *const outUnigramProbability) const { 8594e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); 86381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int pos = getRootPosition(); 87381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int wordPos = 0; 88fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto const int *const codePointTable = mHeaderPolicy.getCodePointTable(); 8965a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi if (outUnigramProbability) { 9065a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi *outUnigramProbability = NOT_A_PROBABILITY; 9165a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi } 9227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will 932fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // only traverse PtNodes that are actually a part of the terminal we are searching, so each 942fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // time we enter this loop we are one depth level further than last time. 952fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // The only reason we count PtNodes is because we want to reduce the probability of infinite 96381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // looping in case there is a bug. Since we know there is an upper bound to the depth we are 97381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // supposed to traverse, it does not hurt to count iterations. 98381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { 9927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi int lastCandidatePtNodePos = 0; 10027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // Let's loop through PtNodes in this PtNode array searching for either the terminal 101381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // or one of its ascendants. 102180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi if (!isValidPos(pos)) { 103180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi AKLOGE("PtNode array position is invalid. pos: %d, dict size: %zd", 104180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi pos, mBuffer.size()); 105be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi mIsCorrupted = true; 106be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi ASSERT(false); 107be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi return 0; 108be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi } 10927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( 110180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi mBuffer.data(), &pos); ptNodeCount > 0; --ptNodeCount) { 111381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int startPos = pos; 112180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi if (!isValidPos(pos)) { 113180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi AKLOGE("PtNode position is invalid. pos: %d, dict size: %zd", pos, mBuffer.size()); 114be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi mIsCorrupted = true; 115be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi ASSERT(false); 116be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi return 0; 117be81b75decd188bd12ef3945c4aacb5dd9fff72fKeisuke Kuroyanagi } 118381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const PatriciaTrieReadingUtils::NodeFlags flags = 119180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mBuffer.data(), &pos); 120381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 121fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto mBuffer.data(), codePointTable, &pos); 12277ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == startPos) { 123381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // We found the position. Copy the rest of the code points in the buffer and return 124381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // the length. 125381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi outCodePoints[wordPos] = character; 126381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { 127381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 128fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto mBuffer.data(), codePointTable, &pos); 129381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // We count code points in order to avoid infinite loops if the file is broken 130381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // or if there is some other bug 131381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int charCount = maxCodePointCount; 132381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi while (NOT_A_CODE_POINT != nextChar && --charCount > 0) { 133381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi outCodePoints[++wordPos] = nextChar; 134381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 135fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto mBuffer.data(), codePointTable, &pos); 136381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 137381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 13865a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi if (outUnigramProbability) { 13965a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi *outUnigramProbability = 14065a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition( 14165a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi mBuffer.data(), &pos); 14265a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi } 143381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi return ++wordPos; 144381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 14527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // We need to skip past this PtNode, so skip any remaining code points after the 146381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // first and possibly the probability. 147381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { 148180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi PatriciaTrieReadingUtils::skipCharacters(mBuffer.data(), flags, MAX_WORD_LENGTH, 149fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto codePointTable, &pos); 150381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 151381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::isTerminal(flags)) { 152180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(), &pos); 153381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 15427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // The fact that this PtNode has children is very important. Since we already know 15527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // that this PtNode does not match, if it has no children we know it is irrelevant 156381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // to what we are searching for. 157381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags); 158381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // We will write in `found' whether we have passed the children position we are 159381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // searching for. For example if we search for "beer", the children of b are less 160381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // than the address we are searching for and the children of c are greater. When we 161381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // come here for c, we realize this is too big, and that we should descend b. 162381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi bool found; 163381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (hasChildren) { 164381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int currentPos = pos; 165381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Here comes the tricky part. First, read the children position. 166381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int childrenPos = PatriciaTrieReadingUtils 167180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi ::readChildrenPositionAndAdvancePosition(mBuffer.data(), flags, 168180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi ¤tPos); 16977ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (childrenPos > ptNodePos) { 170381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // If the children pos is greater than the position, it means the previous 17127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // PtNode, which position is stored in lastCandidatePtNodePos, was the right 172381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // one. 173381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi found = true; 17427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi } else if (1 >= ptNodeCount) { 17527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // However if we are on the LAST PtNode of this array, and we have NOT shot the 1762fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // position we should descend THIS PtNode. So we trick the 1772fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // lastCandidatePtNodePos so that we will descend this PtNode, not the previous 1782fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // one. 17927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi lastCandidatePtNodePos = startPos; 180381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi found = true; 181381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 182381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Else, we should continue looking. 183381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi found = false; 184381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 185381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 1862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // Even if we don't have children here, we could still be on the last PtNode of 18727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // this array. If this is the case, we should descend the last PtNode that had 18827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // children, and their position is already in lastCandidatePtNodePos. 18927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi found = (1 >= ptNodeCount); 190381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 191381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi 192381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (found) { 19327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // Okay, we found the PtNode we should descend. Its position is in 19427b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // the lastCandidatePtNodePos variable, so we just re-read it. 19527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi if (0 != lastCandidatePtNodePos) { 196381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const PatriciaTrieReadingUtils::NodeFlags lastFlags = 197381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::getFlagsAndAdvancePosition( 198180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi mBuffer.data(), &lastCandidatePtNodePos); 199381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 200fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto mBuffer.data(), codePointTable, &lastCandidatePtNodePos); 20127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // We copy all the characters in this PtNode to the buffer 202381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi outCodePoints[wordPos] = lastChar; 203381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) { 204381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 205fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto mBuffer.data(), codePointTable, &lastCandidatePtNodePos); 206381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi int charCount = maxCodePointCount; 207381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi while (-1 != nextChar && --charCount > 0) { 208381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi outCodePoints[++wordPos] = nextChar; 209381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 210fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto mBuffer.data(), codePointTable, &lastCandidatePtNodePos); 211381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 212381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 213381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi ++wordPos; 214381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Now we only need to branch to the children address. Skip the probability if 215381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // it's there, read pos, and break to resume the search at pos. 216381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) { 217180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(), 21827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi &lastCandidatePtNodePos); 219381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 220381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 221180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi mBuffer.data(), lastFlags, &lastCandidatePtNodePos); 222381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi break; 223381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 224381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Here is a little tricky part: we come here if we found out that all children 22527b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // addresses in this PtNode are bigger than the address we are searching for. 226381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // Should we conclude the word is not in the dictionary? No! It could still be 22727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // one of the remaining PtNodes in this array, so we have to keep looking in 22827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // this array until we find it (or we realize it's not there either, in which 22927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // case it's actually not in the dictionary). Pass the end of this PtNode, 23027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // ready to start the next one. 231381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 232381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 233180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi mBuffer.data(), flags, &pos); 234381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 235381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 236381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mShortcutListPolicy.skipAllShortcuts(&pos); 237381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 238381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasBigrams(flags)) { 2393225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi if (!mBigramListPolicy.skipAllBigrams(&pos)) { 240180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi AKLOGE("Cannot skip bigrams. BufSize: %zd, pos: %d.", mBuffer.size(), 2413225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi pos); 2423225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi mIsCorrupted = true; 2433225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi ASSERT(false); 2443225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi return 0; 2453225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi } 246381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 247381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 248381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } else { 249381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // If we did not find it, we should record the last children address for the next 250381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // iteration. 25127b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi if (hasChildren) lastCandidatePtNodePos = startPos; 25227b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // Now skip the end of this PtNode (children pos and the attributes if any) so that 25327b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi // our pos is after the end of this PtNode, at the start of the next one. 254381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 255381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 256180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi mBuffer.data(), flags, &pos); 257381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 258381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 259381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi mShortcutListPolicy.skipAllShortcuts(&pos); 260381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 261381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi if (PatriciaTrieReadingUtils::hasBigrams(flags)) { 2623225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi if (!mBigramListPolicy.skipAllBigrams(&pos)) { 263180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi AKLOGE("Cannot skip bigrams. BufSize: %zd, pos: %d.", mBuffer.size(), pos); 2643225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi mIsCorrupted = true; 2653225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi ASSERT(false); 2663225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi return 0; 2673225b6fe66a84ed7f499daf84d085141a66bb346Keisuke Kuroyanagi } 268381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 269381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 270381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi 271381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 272381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi } 27377ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi // If we have looked through all the PtNodes and found no match, the ptNodePos is 274381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi // not the position of a terminal in this dictionary. 275381c12df20f0113c141c3590e21fdae405f57853Keisuke Kuroyanagi return 0; 276c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} 277c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 2782fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// This function gets the position of the terminal PtNode of the exact matching word in the 27989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi// dictionary. If no match is found, it returns NOT_A_WORD_ID. 28089a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagiint PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, 2816ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi const bool forceLowerCaseSearch) const { 282be6117058840492c2862f8ae9f7dc95c29f3a8f3Keisuke Kuroyanagi DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); 283be6117058840492c2862f8ae9f7dc95c29f3a8f3Keisuke Kuroyanagi readingHelper.initWithPtNodeArrayPos(getRootPosition()); 2846ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(), 2856ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi wordCodePoints.size(), forceLowerCaseSearch); 286b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi if (readingHelper.isError()) { 287b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi mIsCorrupted = true; 28889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi AKLOGE("Dictionary reading error in getWordId()."); 289b96012acef7c7add578b95efa17f3b6132220fd6Keisuke Kuroyanagi } 29089a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi return getWordIdFromTerminalPtNodePos(ptNodePos); 291c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} 292c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 293537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagiconst WordAttributes PatriciaTriePolicy::getWordAttributesInContext( 294537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi const WordIdArrayView prevWordIds, const int wordId, 295537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi MultiBigramMap *const multiBigramMap) const { 2969f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi if (wordId == NOT_A_WORD_ID) { 2972111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi return WordAttributes(); 2989f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi } 2999f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); 3009f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi const PtNodeParams ptNodeParams = 3019f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 3029f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi if (multiBigramMap) { 3032111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */, 3042111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi prevWordIds, wordId, ptNodeParams.getProbability()); 3052111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi return getWordAttributes(probability, ptNodeParams); 3069f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi } 307537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi if (!prevWordIds.empty()) { 3089f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi const int bigramProbability = getProbabilityOfWord(prevWordIds, wordId); 3099f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi if (bigramProbability != NOT_A_PROBABILITY) { 3102111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi return getWordAttributes(bigramProbability, ptNodeParams); 3119f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi } 3129f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi } 3132111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY), 3142111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi ptNodeParams); 3152111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi} 3162111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi 3172111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagiconst WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability, 3182111e3abc9c9c0ea0350b8470532bf636b78cdd7Keisuke Kuroyanagi const PtNodeParams &ptNodeParams) const { 3197c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(), 3207c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu ptNodeParams.isPossiblyOffensive()); 3219f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi} 3229f8da0f833c7aab226ed0b93ab6c546380b068bbKeisuke Kuroyanagi 32365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagiint PatriciaTriePolicy::getProbability(const int unigramProbability, 32465d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi const int bigramProbability) const { 3258681bef03c1ca864d3de0ae27adb5cbfb63f0fefKeisuke Kuroyanagi // Due to space constraints, the probability for bigrams is approximate - the lower the unigram 3268681bef03c1ca864d3de0ae27adb5cbfb63f0fefKeisuke Kuroyanagi // probability, the worse the precision. The theoritical maximum error in resulting probability 3278681bef03c1ca864d3de0ae27adb5cbfb63f0fefKeisuke Kuroyanagi // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means 3288681bef03c1ca864d3de0ae27adb5cbfb63f0fefKeisuke Kuroyanagi // that sometimes, we'll see some bigrams interverted here, but it can't get too bad. 32965d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi if (unigramProbability == NOT_A_PROBABILITY) { 33065d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return NOT_A_PROBABILITY; 33165d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi } else if (bigramProbability == NOT_A_PROBABILITY) { 33265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return ProbabilityUtils::backoff(unigramProbability); 33365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi } else { 33465d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, 33565d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi bigramProbability); 33665d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi } 33765d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi} 33865d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi 339537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagiint PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds, 340537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi const int wordId) const { 34189a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi if (wordId == NOT_A_WORD_ID) { 342647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_PROBABILITY; 343647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 34489a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); 3450fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi const PtNodeParams ptNodeParams = 3460fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 3477c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu if (ptNodeParams.isNotAWord()) { 3487c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu // If this is not a word, it should behave as having no probability outside of the 3497c87859d4c16c9cf19b095b865d7000ebc3cdaa9Adrian Velicu // suggestion process (where it should be used for shortcuts). 350c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi return NOT_A_PROBABILITY; 3511229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi } 352537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi if (!prevWordIds.empty()) { 35389a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi const int bigramsPosition = getBigramsPositionOfPtNode( 35489a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi getTerminalPtNodePosFromWordId(prevWordIds[0])); 3550e6a1d1020c537d847ac4222cf621dea9db4311eKeisuke Kuroyanagi BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition); 3561229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi while (bigramsIt.hasNext()) { 3571229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi bigramsIt.next(); 3581229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi if (bigramsIt.getBigramPos() == ptNodePos 3591229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi && bigramsIt.getProbability() != NOT_A_PROBABILITY) { 3601229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); 3611229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi } 3621229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi } 3631229879e7c5892e818ab53b3c2162a158cc5e177Keisuke Kuroyanagi return NOT_A_PROBABILITY; 364c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi } 3651e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); 366c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} 367c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi 368537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagivoid PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds, 3692d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi NgramListener *const listener) const { 370537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi if (prevWordIds.empty()) { 371bd1f59bda5ad0b7028ec06c2de078f1623e76cddKeisuke Kuroyanagi return; 372bd1f59bda5ad0b7028ec06c2de078f1623e76cddKeisuke Kuroyanagi } 37389a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi const int bigramsPosition = getBigramsPositionOfPtNode( 37489a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi getTerminalPtNodePosFromWordId(prevWordIds[0])); 3750e6a1d1020c537d847ac4222cf621dea9db4311eKeisuke Kuroyanagi BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition); 3762d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi while (bigramsIt.hasNext()) { 3772d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi bigramsIt.next(); 37889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi listener->onVisitEntry(bigramsIt.getProbability(), 37989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos())); 3802d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi } 3812d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi} 3822d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi 383537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke KuroyanagiBinaryDictionaryShortcutIterator PatriciaTriePolicy::getShortcutIterator(const int wordId) const { 384ac983b13a9ee97d9a1a0ecd4a12dff5b2e2552e5Keisuke Kuroyanagi const int shortcutPos = getShortcutPositionOfPtNode(getTerminalPtNodePosFromWordId(wordId)); 385847a026cd81f52f7aee0d5fd75c14a3f8619ebafKeisuke Kuroyanagi return BinaryDictionaryShortcutIterator(&mShortcutListPolicy, shortcutPos); 386847a026cd81f52f7aee0d5fd75c14a3f8619ebafKeisuke Kuroyanagi} 387847a026cd81f52f7aee0d5fd75c14a3f8619ebafKeisuke Kuroyanagi 38877ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { 38977ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == NOT_A_DICT_POS) { 390647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_DICT_POS; 391647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 3920fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos(); 393c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi} 394c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi 39577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { 39677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == NOT_A_DICT_POS) { 397647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return NOT_A_DICT_POS; 398647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 3990fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getBigramsPos(); 400c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi} 401c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi 402647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagiint PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, 40377ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi const int ptNodePos, DicNodeVector *childDicNodes) const { 4041e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi PatriciaTrieReadingUtils::NodeFlags flags; 4051e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi int mergedNodeCodePointCount = 0; 4061fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi int mergedNodeCodePoints[MAX_WORD_LENGTH]; 4071e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi int probability = NOT_A_PROBABILITY; 4081e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi int childrenPos = NOT_A_DICT_POS; 4091e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi int shortcutPos = NOT_A_DICT_POS; 4101e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi int bigramPos = NOT_A_DICT_POS; 4111e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi int siblingPos = NOT_A_DICT_POS; 412fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto const int *const codePointTable = mHeaderPolicy.getCodePointTable(); 413180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, &mShortcutListPolicy, 414fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto &mBigramListPolicy, codePointTable, &flags, &mergedNodeCodePointCount, 415fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos, 416fb2bde5a688d93aa946e3dd923aa1e99588777fcAkifumi Yoshimoto &siblingPos); 41779ba633402ceeebe216055cbd99a9e9701460f4aKeisuke Kuroyanagi // Skip PtNodes don't start with Unicode code point because they represent non-word information. 41879ba633402ceeebe216055cbd99a9e9701460f4aKeisuke Kuroyanagi if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { 4197d475003572c9c2902f5918bad524f4ac233e629Keisuke Kuroyanagi const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID; 420d53aea5af934f8bf7f0cd46ed578fe531dff96c5Keisuke Kuroyanagi childDicNodes->pushLeavingChild(dicNode, childrenPos, wordId, 421521e2382da833f114477f4ef68db68a8d30b64d6Keisuke Kuroyanagi CodePointArrayView(mergedNodeCodePoints, mergedNodeCodePointCount)); 42279ba633402ceeebe216055cbd99a9e9701460f4aKeisuke Kuroyanagi } 4231e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi return siblingPos; 4241fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi} 4251fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi 4266ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagiconst WordProperty PatriciaTriePolicy::getWordProperty( 4276ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi const CodePointArrayView wordCodePoints) const { 42889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */); 42989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi if (wordId == NOT_A_WORD_ID) { 430c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi AKLOGE("getWordProperty was called for invalid word."); 431c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi return WordProperty(); 432c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi } 43389a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); 4340fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi const PtNodeParams ptNodeParams = 4350fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 436c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi // Fetch bigram information. 43779bb37d499ed6fcabe981153d5ff0b5b69509933Keisuke Kuroyanagi std::vector<NgramProperty> ngrams; 438c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); 439c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi int bigramWord1CodePoints[MAX_WORD_LENGTH]; 440b00973952f269ebee6d1d5f808fad7ca64fb9954Keisuke Kuroyanagi BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos); 441c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi while (bigramsIt.hasNext()) { 442c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi // Fetch the next bigram information and forward the iterator. 443c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi bigramsIt.next(); 444c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi // Skip the entry if the entry has been deleted. This never happens for ver2 dicts. 445c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) { 446c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi int word1Probability = NOT_A_PROBABILITY; 447ca42ec0f44707529fc067da36ba9d972641e4897Keisuke Kuroyanagi const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( 44894e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH, 44994e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi bigramWord1CodePoints, &word1Probability); 450a34bdc395b5ce51a87ff3f550b1025fbe442098aKeisuke Kuroyanagi const int probability = getProbability(word1Probability, bigramsIt.getProbability()); 45179bb37d499ed6fcabe981153d5ff0b5b69509933Keisuke Kuroyanagi ngrams.emplace_back( 45288bb28c132d87f15a52e9a0b8a45950f39eb19adKeisuke Kuroyanagi NgramContext(wordCodePoints.data(), wordCodePoints.size(), 45388bb28c132d87f15a52e9a0b8a45950f39eb19adKeisuke Kuroyanagi ptNodeParams.representsBeginningOfSentence()), 4542842e50c4b454f44cfb49a59b4ba2c13816a876dKeisuke Kuroyanagi CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(), 455287e155e44b4e937f2a62d010805702bc813c43bKeisuke Kuroyanagi probability, HistoricalInfo()); 456c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi } 457c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi } 458c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi // Fetch shortcut information. 459e41b2ed8d31b84308f77a0bd14c5eecc5a17960aKeisuke Kuroyanagi std::vector<UnigramProperty::ShortcutProperty> shortcuts; 460c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); 461c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi if (shortcutPos != NOT_A_DICT_POS) { 462c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi int shortcutTargetCodePoints[MAX_WORD_LENGTH]; 46359ebd5171820785f4c7b893f48df8b9d65cc4902Keisuke Kuroyanagi ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mBuffer, &shortcutPos); 464c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi bool hasNext = true; 465c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi while (hasNext) { 466c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi const ShortcutListReadingUtils::ShortcutFlags shortcutFlags = 46759ebd5171820785f4c7b893f48df8b9d65cc4902Keisuke Kuroyanagi ShortcutListReadingUtils::getFlagsAndForwardPointer(mBuffer, &shortcutPos); 468c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags); 469c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget( 47059ebd5171820785f4c7b893f48df8b9d65cc4902Keisuke Kuroyanagi mBuffer, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos); 471c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi const int shortcutProbability = 472c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags); 4732842e50c4b454f44cfb49a59b4ba2c13816a876dKeisuke Kuroyanagi shortcuts.emplace_back( 4742842e50c4b454f44cfb49a59b4ba2c13816a876dKeisuke Kuroyanagi CodePointArrayView(shortcutTargetCodePoints, shortcutTargetLength).toVector(), 4752842e50c4b454f44cfb49a59b4ba2c13816a876dKeisuke Kuroyanagi shortcutProbability); 476c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi } 477c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi } 4782a015dcb25b2996ccca0d9fac74b334aa35928a3Keisuke Kuroyanagi const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), 47905172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(), 48005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts)); 481bbf0d4141bf4c5ee1026a8dfe45d46a416ad35b1Keisuke Kuroyanagi return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); 482c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi} 483c63d183473390dbe6ddef37df48b36ae49de3f29Keisuke Kuroyanagi 484f7322b166b88f72b19509d8416700d4ec8ea7753Keisuke Kuroyanagiint PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, 485f7322b166b88f72b19509d8416700d4ec8ea7753Keisuke Kuroyanagi int *const outCodePointCount) { 486f7322b166b88f72b19509d8416700d4ec8ea7753Keisuke Kuroyanagi *outCodePointCount = 0; 4870fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi if (token == 0) { 4880fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi // Start iterating the dictionary. 4890fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi mTerminalPtNodePositionsForIteratingWords.clear(); 4900fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( 4910fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi &mTerminalPtNodePositionsForIteratingWords); 4920fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); 4930fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi readingHelper.initWithPtNodeArrayPos(getRootPosition()); 4940fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); 4950fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi } 4960fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi const int terminalPtNodePositionsVectorSize = 4970fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); 4980fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi if (token < 0 || token >= terminalPtNodePositionsVectorSize) { 4990fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi AKLOGE("Given token %d is invalid.", token); 5000fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi return 0; 5010fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi } 5020fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; 50365a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi *outCodePointCount = getCodePointsAndReturnCodePointCount( 50465a7ccfa003f898064f09c059eb236cf8f82cd89Keisuke Kuroyanagi getWordIdFromTerminalPtNodePos(terminalPtNodePos), MAX_WORD_LENGTH, outCodePoints); 5050fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi const int nextToken = token + 1; 5060fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi if (nextToken >= terminalPtNodePositionsVectorSize) { 5070fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi // All words have been iterated. 5080fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi mTerminalPtNodePositionsForIteratingWords.clear(); 5090fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi return 0; 5100fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi } 5110fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi return nextToken; 5120fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi} 5130fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi 51489a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagiint PatriciaTriePolicy::getWordIdFromTerminalPtNodePos(const int ptNodePos) const { 51589a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi return ptNodePos == NOT_A_DICT_POS ? NOT_A_WORD_ID : ptNodePos; 51689a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi} 51789a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi 51889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagiint PatriciaTriePolicy::getTerminalPtNodePosFromWordId(const int wordId) const { 51989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi return wordId == NOT_A_WORD_ID ? NOT_A_DICT_POS : wordId; 52089a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi} 521180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi 522180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagibool PatriciaTriePolicy::isValidPos(const int pos) const { 523180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi return pos >= 0 && pos < static_cast<int>(mBuffer.size()); 524180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi} 525180e7b4c07f8c17862b9bdcdee0bc4b4413328a9Keisuke Kuroyanagi 526c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} // namespace latinime 527