1c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi/*
2c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Copyright (C) 2013, The Android Open Source Project
3c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
4c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Licensed under the Apache License, Version 2.0 (the "License");
5c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * you may not use this file except in compliance with the License.
6c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * You may obtain a copy of the License at
7c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
8c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *     http://www.apache.org/licenses/LICENSE-2.0
9c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi *
10c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * Unless required by applicable law or agreed to in writing, software
11c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * distributed under the License is distributed on an "AS IS" BASIS,
12c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * See the License for the specific language governing permissions and
14c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi * limitations under the License.
15c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi */
16c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
17c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
18c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#define LATINIME_PATRICIA_TRIE_POLICY_H
19c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
20e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi#include <stdint.h>
21e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi
22c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#include "defines.h"
23e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
24277053af7c1920f5312c3ff9e52bc741791e9690Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
2523d3ed962f150578d98da7b9c61c0466d5697d93Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/header/header_policy.h"
26fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
27a72a33388f0e8acc20adf96372691886753e0adcKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
28c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
29c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynaginamespace latinime {
30c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
31e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagiclass DicNode;
32e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagiclass DicNodeVector;
33e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi
34e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagiclass PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
35c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi public:
36a72a33388f0e8acc20adf96372691886753e0adcKeisuke Kuroyanagi    PatriciaTriePolicy(const MmappedBuffer *const buffer)
37484fa7b59cb0659ac18fa68da5c7b641d9255be8Keisuke Kuroyanagi            : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
380624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi              mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
39009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi              mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
40d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
41e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi
420624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi    ~PatriciaTriePolicy() {
430624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        delete mBuffer;
440624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi    }
45c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
46c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi    AK_FORCE_INLINE int getRootPosition() const {
47c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi        return 0;
48c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi    }
49c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
50c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi    void createAndGetAllChildNodes(const DicNode *const dicNode,
517fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            DicNodeVector *const childDicNodes) const;
52c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
531311cdcb6233abde792a9d9fdd294334c9be7043Keisuke Kuroynagi    int getCodePointsAndProbabilityAndReturnCodePointCount(
541311cdcb6233abde792a9d9fdd294334c9be7043Keisuke Kuroynagi            const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
55c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi            int *const outUnigramProbability) const;
56c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
57e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi    int getTerminalNodePositionOfWord(const int *const inWord,
58c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi            const int length, const bool forceLowerCaseSearch) const;
59c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
6065d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    int getProbability(const int unigramProbability, const int bigramProbability) const;
6165d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi
6277ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
63c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
6477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    int getShortcutPositionOfPtNode(const int ptNodePos) const;
65c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi
6677ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    int getBigramsPositionOfPtNode(const int ptNodePos) const;
67c38ec475962ad6b7f14abe35b950545ebcdbe3c5Keisuke Kuroynagi
6876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
6976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi        return &mHeaderPolicy;
70d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi    }
71d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi
72668870be431d17ee4ceb5ce161aee1189063af18Keisuke Kuroyanagi    const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
73668870be431d17ee4ceb5ce161aee1189063af18Keisuke Kuroyanagi        return &mBigramListPolicy;
74668870be431d17ee4ceb5ce161aee1189063af18Keisuke Kuroyanagi    }
75668870be431d17ee4ceb5ce161aee1189063af18Keisuke Kuroyanagi
76fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi    const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
77fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi        return &mShortcutListPolicy;
78fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi    }
79fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi
8066facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    bool addUnigramWord(const int *const word, const int length, const int probability) {
810624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        // This method should not be called for non-updatable dictionary.
820624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
8366facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi        return false;
8466facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    }
8566facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi
8666facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    bool addBigramWords(const int *const word0, const int length0, const int *const word1,
8766facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi            const int length1, const int probability) {
880624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        // This method should not be called for non-updatable dictionary.
890624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
9066facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi        return false;
9166facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    }
9266facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi
9366facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
9466facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi            const int length1) {
950624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        // This method should not be called for non-updatable dictionary.
960624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
9766facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi        return false;
9866facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    }
9966facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi
100d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    void flush(const char *const filePath) {
101d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi        // This method should not be called for non-updatable dictionary.
102d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi        AKLOGI("Warning: flush() is called for non-updatable dictionary.");
103d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    }
104d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
105d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    void flushWithGC(const char *const filePath) {
106d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi        // This method should not be called for non-updatable dictionary.
107d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi        AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
108d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    }
109d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
110c18510049a3422c88ed3ab3bbc64944c94a611fdKeisuke Kuroyanagi    bool needsToRunGC(const bool mindsBlockByGC) const {
111d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi        // This method should not be called for non-updatable dictionary.
112d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi        AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
113d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi        return false;
114d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    }
115d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
11631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi    void getProperty(const char *const query, char *const outResult,
117699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi            const int maxResultLength) {
11831097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        // getProperty is not supported for this class.
11931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        if (maxResultLength > 0) {
12031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi            outResult[0] = '\0';
12131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        }
12231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi    }
12331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi
124c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi private:
125e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
126c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi
127a72a33388f0e8acc20adf96372691886753e0adcKeisuke Kuroyanagi    const MmappedBuffer *const mBuffer;
12876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    const HeaderPolicy mHeaderPolicy;
129e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagi    const uint8_t *const mDictRoot;
130009dcac33f53bb92d0a8b7f0789a26568b04f014Keisuke Kuroyanagi    const int mDictBufferSize;
131668870be431d17ee4ceb5ce161aee1189063af18Keisuke Kuroyanagi    const BigramListPolicy mBigramListPolicy;
132fd10db04e02ddad88d0c6fca82583493955a7c7eKeisuke Kuroyanagi    const ShortcutListPolicy mShortcutListPolicy;
1331fb11da36ab279fa4fcc62d772d9cce877bf23bdKeisuke Kuroynagi
13477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
1357fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            DicNodeVector *const childDicNodes) const;
136c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi};
137c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi} // namespace latinime
138c5e6efafff56c57c5527fe64dddb851df0719634Keisuke Kuroynagi#endif // LATINIME_PATRICIA_TRIE_POLICY_H
139