12fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa/*
22fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Copyright (C) 2013, The Android Open Source Project
32fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa *
42fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Licensed under the Apache License, Version 2.0 (the "License");
52fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * you may not use this file except in compliance with the License.
62fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * You may obtain a copy of the License at
72fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa *
82fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa *     http://www.apache.org/licenses/LICENSE-2.0
92fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa *
102fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Unless required by applicable law or agreed to in writing, software
112fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * distributed under the License is distributed on an "AS IS" BASIS,
122fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
132fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * See the License for the specific language governing permissions and
142fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * limitations under the License.
152fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa */
162fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
172fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#ifndef LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
182fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#define LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
192fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
202fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#include "defines.h"
2188bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/pt_common/pt_node_params.h"
229069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi#include "utils/int_array_view.h"
232fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
242fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasanamespace latinime {
252fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
2679bb37d499ed6fcabe981153d5ff0b5b69509933Keisuke Kuroyanagiclass NgramProperty;
272fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass BufferWithExtendableBuffer;
282fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass DynamicPtReadingHelper;
292fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass PtNodeReader;
302fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass PtNodeWriter;
31b636e25e951e48e071d5348756413d6fc065632dKeisuke Kuroyanagiclass UnigramProperty;
322fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
332fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaclass DynamicPtUpdatingHelper {
342fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa public:
352fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    DynamicPtUpdatingHelper(BufferWithExtendableBuffer *const buffer,
362fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter)
372fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {}
382fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
392fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    ~DynamicPtUpdatingHelper() {}
402fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
412fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    // Add a word to the dictionary. If the word already exists, update the probability.
422fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
4389a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty,
4489a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            bool *const outAddedNewUnigram);
452fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
460c0b8207cdabc1f5c1a81441c1ab0cb715e458eaKeisuke Kuroyanagi    // TODO: Remove after stopping supporting v402.
479069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi    // Add an n-gram entry.
489069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi    bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
4979bb37d499ed6fcabe981153d5ff0b5b69509933Keisuke Kuroyanagi            const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
502fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
510c0b8207cdabc1f5c1a81441c1ab0cb715e458eaKeisuke Kuroyanagi    // TODO: Remove after stopping supporting v402.
529069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi    // Remove an n-gram entry.
539069d30043d5182dfd38465ad9bbc11ad73fab7cKeisuke Kuroyanagi    bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
542fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
552fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    // Add a shortcut target.
5689a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi    bool addShortcutTarget(const int wordPos, const CodePointArrayView targetCodePoints,
5789a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const int shortcutProbability);
582fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
592fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa private:
602fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
612fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
622fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    static const int CHILDREN_POSITION_FIELD_SIZE;
632fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
642fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    BufferWithExtendableBuffer *const mBuffer;
652fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    const PtNodeReader *const mPtNodeReader;
662fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    PtNodeWriter *const mPtNodeWriter;
672fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
6889a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi    bool createAndInsertNodeIntoPtNodeArray(const int parentPos,
6989a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const CodePointArrayView ptNodeCodePoints, const UnigramProperty *const unigramProperty,
70b636e25e951e48e071d5348756413d6fc065632dKeisuke Kuroyanagi            int *const forwardLinkFieldPos);
712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
72b636e25e951e48e071d5348756413d6fc065632dKeisuke Kuroyanagi    bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
73b636e25e951e48e071d5348756413d6fc065632dKeisuke Kuroyanagi            const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
742fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
752fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
7689a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const UnigramProperty *const unigramProperty,
7789a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const CodePointArrayView remainingCodePoints);
782fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
7989a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi    bool createNewPtNodeArrayWithAChildPtNode(const int parentPos,
8089a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const CodePointArrayView ptNodeCodePoints,
8189a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const UnigramProperty *const unigramProperty);
822fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
8389a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi    bool reallocatePtNodeAndAddNewPtNodes(const PtNodeParams *const reallocatingPtNodeParams,
8489a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const size_t overlappingCodePointCount, const UnigramProperty *const unigramProperty,
8589a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const CodePointArrayView newPtNodeCodePoints);
862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
872fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
8805172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu            const bool isNotAWord, const bool isPossiblyOffensive, const bool isTerminal,
8989a074fade9154070e34344f1b8f53b516abeaffKeisuke Kuroyanagi            const int parentPos, const CodePointArrayView codePoints, const int probability) const;
902fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
9105172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu    const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord,
9205172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu            const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
9305172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu            const CodePointArrayView codePoints, const int probability) const;
942fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa};
952fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa} // namespace latinime
962fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
97