12fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa/* 22fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Copyright (C) 2013, The Android Open Source Project 32fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * 42fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Licensed under the Apache License, Version 2.0 (the "License"); 52fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * you may not use this file except in compliance with the License. 62fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * You may obtain a copy of the License at 72fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * 82fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * http://www.apache.org/licenses/LICENSE-2.0 92fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * 102fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Unless required by applicable law or agreed to in writing, software 112fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * distributed under the License is distributed on an "AS IS" BASIS, 122fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * See the License for the specific language governing permissions and 142fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * limitations under the License. 152fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa */ 162fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 172fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#ifndef LATINIME_DYNAMIC_PT_UPDATING_HELPER_H 182fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#define LATINIME_DYNAMIC_PT_UPDATING_HELPER_H 192fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 202fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#include "defines.h" 2188bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/pt_common/pt_node_params.h" 229069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi#include "utils/int_array_view.h" 232fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 242fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasanamespace latinime { 252fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 2679bb37d499ed6fcabe981153d5ff0b5b69509933Keisuke Kuroyanagiclass NgramProperty; 272fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass BufferWithExtendableBuffer; 282fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass DynamicPtReadingHelper; 292fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass PtNodeReader; 302fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass PtNodeWriter; 31b636e25e951e48e071d5348756413d6fc065632dKeisuke Kuroyanagiclass UnigramProperty; 322fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 332fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass DynamicPtUpdatingHelper { 342fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa public: 352fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa DynamicPtUpdatingHelper(BufferWithExtendableBuffer *const buffer, 362fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter) 372fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {} 382fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 392fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa ~DynamicPtUpdatingHelper() {} 402fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 412fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // Add a word to the dictionary. If the word already exists, update the probability. 422fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa bool addUnigramWord(DynamicPtReadingHelper *const readingHelper, 4389a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty, 4489a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi bool *const outAddedNewUnigram); 452fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 460c0b8207cdabc1f5c1a81441c1ab0cb715e458eaKeisuke Kuroyanagi // TODO: Remove after stopping supporting v402. 479069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi // Add an n-gram entry. 489069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos, 4979bb37d499ed6fcabe981153d5ff0b5b69509933Keisuke Kuroyanagi const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); 502fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 510c0b8207cdabc1f5c1a81441c1ab0cb715e458eaKeisuke Kuroyanagi // TODO: Remove after stopping supporting v402. 529069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi // Remove an n-gram entry. 539069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos); 542fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 552fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // Add a shortcut target. 5689a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi bool addShortcutTarget(const int wordPos, const CodePointArrayView targetCodePoints, 5789a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const int shortcutProbability); 582fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 592fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa private: 602fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper); 612fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 622fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa static const int CHILDREN_POSITION_FIELD_SIZE; 632fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 642fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa BufferWithExtendableBuffer *const mBuffer; 652fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa const PtNodeReader *const mPtNodeReader; 662fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa PtNodeWriter *const mPtNodeWriter; 672fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 6889a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi bool createAndInsertNodeIntoPtNodeArray(const int parentPos, 6989a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const CodePointArrayView ptNodeCodePoints, const UnigramProperty *const unigramProperty, 70b636e25e951e48e071d5348756413d6fc065632dKeisuke Kuroyanagi int *const forwardLinkFieldPos); 712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 72b636e25e951e48e071d5348756413d6fc065632dKeisuke Kuroyanagi bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, 73b636e25e951e48e071d5348756413d6fc065632dKeisuke Kuroyanagi const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); 742fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 752fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, 7689a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const UnigramProperty *const unigramProperty, 7789a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const CodePointArrayView remainingCodePoints); 782fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 7989a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, 8089a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const CodePointArrayView ptNodeCodePoints, 8189a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const UnigramProperty *const unigramProperty); 822fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 8389a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi bool reallocatePtNodeAndAddNewPtNodes(const PtNodeParams *const reallocatingPtNodeParams, 8489a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const size_t overlappingCodePointCount, const UnigramProperty *const unigramProperty, 8589a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const CodePointArrayView newPtNodeCodePoints); 862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 872fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, 8805172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu const bool isNotAWord, const bool isPossiblyOffensive, const bool isTerminal, 8989a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi const int parentPos, const CodePointArrayView codePoints, const int probability) const; 902fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 9105172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, 9205172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu const bool isPossiblyOffensive, const bool isTerminal, const int parentPos, 9305172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu const CodePointArrayView codePoints, const int probability) const; 942fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa}; 952fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa} // namespace latinime 962fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */ 97