1923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project/*
2923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * Copyright (C) 2009 The Android Open Source Project
3923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project *
4923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License");
5923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * you may not use this file except in compliance with the License.
6923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * You may obtain a copy of the License at
7923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project *
8923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project *      http://www.apache.org/licenses/LICENSE-2.0
9923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project *
10923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * Unless required by applicable law or agreed to in writing, software
11923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS,
12923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * See the License for the specific language governing permissions and
14923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project * limitations under the License.
15923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project */
16923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
17923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project#ifndef LATINIME_DICTIONARY_H
18923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project#define LATINIME_DICTIONARY_H
19923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
204ce480d5ce2d47f607448ce439aaf2cefba1bdd8Keisuke Kuroyanagi#include <memory>
211ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard
22e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#include "defines.h"
237eba0198c0344ce7aac9867711d7944811dd3ad5Jean Chalard#include "jni.h"
2488bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/dictionary_header_structure_policy.h"
2588bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
2688bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/ngram_listener.h"
2788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/historical_info.h"
2888bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/word_property.h"
292fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa#include "suggest/core/suggest_interface.h"
3011a48f92a517b2c8b1899bc57ab9dfe6c48209ebKeisuke Kuroyanagi#include "utils/int_array_view.h"
3130088259480130e5bac5c2028e2c7c3e6d4c51a2satok
32923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectnamespace latinime {
33923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
34d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagiclass DictionaryStructureWithBufferPolicy;
357c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataokaclass DicTraverseSession;
3672e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagiclass NgramContext;
3777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasaclass ProximityInfo;
38e137ec0a91cf93b0a99fd1e1556ee835d026f731Keisuke Kuroyanagiclass SuggestionResults;
39338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagiclass SuggestOptions;
4077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
41923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectclass Dictionary {
42e12e9b5b69e6242af61ee690a81bedde1bdd4936Ken Wakasa public:
43c7387a4fd065ad6782b0705e56f9556ac9cf127fJean Chalard    // Taken from SuggestedWords.java
4499e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard    static const int KIND_MASK_KIND = 0xFF; // Mask to get only the kind
45ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_TYPED = 0; // What user typed
46ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_CORRECTION = 1; // Simple correction/suggestion
47ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_COMPLETION = 2; // Completion (suggestion with appended chars)
48ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_WHITELIST = 3; // Whitelisted word
49ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_BLACKLIST = 4; // Blacklisted word
50ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_HARDCODED = 5; // Hardcoded suggestion, e.g. punctuation
51ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_APP_DEFINED = 6; // Suggested by the application
52ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_SHORTCUT = 7; // A shortcut
53ee456af9d4c7ce4fe2dc0513a8b2cf08c21b5f21Ken Wakasa    static const int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input)
54599d8778f475f5d2f4db9ae34ad9f963d7b5f261Satoshi Kataoka    // KIND_RESUMED: A resumed suggestion (comes from a span, currently this type is used only
55599d8778f475f5d2f4db9ae34ad9f963d7b5f261Satoshi Kataoka    // in java for re-correction)
56599d8778f475f5d2f4db9ae34ad9f963d7b5f261Satoshi Kataoka    static const int KIND_RESUMED = 9;
57599d8778f475f5d2f4db9ae34ad9f963d7b5f261Satoshi Kataoka    static const int KIND_OOV_CORRECTION = 10; // Most probable string correction
58c7387a4fd065ad6782b0705e56f9556ac9cf127fJean Chalard
5999e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard    static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
6099e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard    static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
6199e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard    static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
62a6278eb9c1fbe102259cba392b1459f712ca46e7Keisuke Kuroyanagi    static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
63515c5081352a6b8feaaf570df7ddb6d3676006e7Keisuke Kuroyanagi    static const int KIND_FLAG_APPROPRIATE_FOR_AUTOCORRECTION = 0x10000000;
6499e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard
654ce480d5ce2d47f607448ce439aaf2cefba1bdd8Keisuke Kuroyanagi    Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
664ce480d5ce2d47f607448ce439aaf2cefba1bdd8Keisuke Kuroyanagi            dictionaryStructureWithBufferPolicy);
671147c7bac9bf5367fe81576204a946f6f1248798satok
68d73edf23aca59e6a0a83a79cf24db3850ef473ffKeisuke Kuroyanagi    void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
697c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka            int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
7072e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi            int inputSize, const NgramContext *const ngramContext,
716da9b21191dc7d6049d96945366ec7e605e716e6Jean Chalard            const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
72d73edf23aca59e6a0a83a79cf24db3850ef473ffKeisuke Kuroyanagi            SuggestionResults *const outSuggestionResults) const;
7330088259480130e5bac5c2028e2c7c3e6d4c51a2satok
7472e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    void getPredictions(const NgramContext *const ngramContext,
75e137ec0a91cf93b0a99fd1e1556ee835d026f731Keisuke Kuroyanagi            SuggestionResults *const outSuggestionResults) const;
768fbd55229243cb66c03d5ea1f79dfb39f596590dsatok
773e75c59133000d16d3b5606f67d4ec60988851d8Keisuke Kuroyanagi    int getProbability(const CodePointArrayView codePoints) const;
785bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
793e75c59133000d16d3b5606f67d4ec60988851d8Keisuke Kuroyanagi    int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
80d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
8172e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    int getNgramProbability(const NgramContext *const ngramContext,
823e75c59133000d16d3b5606f67d4ec60988851d8Keisuke Kuroyanagi            const CodePointArrayView codePoints) const;
835bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
843e75c59133000d16d3b5606f67d4ec60988851d8Keisuke Kuroyanagi    bool addUnigramEntry(const CodePointArrayView codePoints,
85793124855de9dabb9e85b1e06619716649f087c5Keisuke Kuroyanagi            const UnigramProperty *const unigramProperty);
865bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
873e75c59133000d16d3b5606f67d4ec60988851d8Keisuke Kuroyanagi    bool removeUnigramEntry(const CodePointArrayView codePoints);
88f12985245c962779f0b422b3072cffe533b4edfbKeisuke Kuroyanagi
8988bb28c132d87f15a52e9a0b8a45950f39eb19adKeisuke Kuroyanagi    bool addNgramEntry(const NgramProperty *const ngramProperty);
905bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
9172e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    bool removeNgramEntry(const NgramContext *const ngramContext,
923e75c59133000d16d3b5606f67d4ec60988851d8Keisuke Kuroyanagi            const CodePointArrayView codePoints);
935bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
9472e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
9529777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi            const CodePointArrayView codePoints, const bool isValidWord,
9629777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi            const HistoricalInfo historicalInfo);
9729777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi
98dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi    bool flush(const char *const filePath);
99d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
100dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi    bool flushWithGC(const char *const filePath);
101d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
102c18510049a3422c88ed3ab3bbc64944c94a611fdKeisuke Kuroyanagi    bool needsToRunGC(const bool mindsBlockByGC);
103d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
1042fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    void getProperty(const char *const query, const int queryLength, char *const outResult,
105699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi            const int maxResultLength);
10631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi
1073e75c59133000d16d3b5606f67d4ec60988851d8Keisuke Kuroyanagi    const WordProperty getWordProperty(const CodePointArrayView codePoints);
1082fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
10938f341a2a53a04ce4195a0cb99fcb6e71203dec0Keisuke Kuroyanagi    // Method to iterate all words in the dictionary.
11038f341a2a53a04ce4195a0cb99fcb6e71203dec0Keisuke Kuroyanagi    // The returned token has to be used to get the next word. If token is 0, this method newly
11138f341a2a53a04ce4195a0cb99fcb6e71203dec0Keisuke Kuroyanagi    // starts iterating the dictionary.
112f7322b166b88f72b19509d8416700d4ec8ea7753Keisuke Kuroyanagi    int getNextWordAndNextToken(const int token, int *const outCodePoints,
113f7322b166b88f72b19509d8416700d4ec8ea7753Keisuke Kuroyanagi            int *const outCodePointCount);
11438f341a2a53a04ce4195a0cb99fcb6e71203dec0Keisuke Kuroyanagi
115d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi    const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
1162fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        return mDictionaryStructureWithBufferPolicy.get();
117d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi    }
118d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi
119e12e9b5b69e6242af61ee690a81bedde1bdd4936Ken Wakasa private:
1201bc038c5e40bd0fa7a44331a569abe3ae88f0152satok    DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
121e90b333017c68e888a5e3d351f07ea29036457d0Ken Wakasa
1224ce480d5ce2d47f607448ce439aaf2cefba1bdd8Keisuke Kuroyanagi    typedef std::unique_ptr<SuggestInterface> SuggestInterfacePtr;
1232fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
1242d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi    class NgramListenerForPrediction : public NgramListener {
1252d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi     public:
12672e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi        NgramListenerForPrediction(const NgramContext *const ngramContext,
12711a48f92a517b2c8b1899bc57ab9dfe6c48209ebKeisuke Kuroyanagi                const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults,
1282d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi                const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
12994e4cd25a8f7417d30a0832f7476d39ece1df788Keisuke Kuroyanagi        virtual void onVisitEntry(const int ngramProbability, const int targetWordId);
1302d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi
1312d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi     private:
1322d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi        DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
1332d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi
13472e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi        const NgramContext *const mNgramContext;
13511a48f92a517b2c8b1899bc57ab9dfe6c48209ebKeisuke Kuroyanagi        const WordIdArrayView mPrevWordIds;
1362d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi        SuggestionResults *const mSuggestionResults;
1372d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
1382d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi    };
1392d57b3339ad5b4bbf0939858c36c7daf5e38a4cbKeisuke Kuroyanagi
1405ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
1415ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi
1422fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
1432fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            mDictionaryStructureWithBufferPolicy;
1442fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    const SuggestInterfacePtr mGestureSuggest;
1452fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    const SuggestInterfacePtr mTypingSuggest;
14676e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi
14776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    void logDictionaryInfo(JNIEnv *const env) const;
148923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project};
149ce9e52a12a6af8fca0eba42aaae24602fbd5c998Ken Wakasa} // namespace latinime
150923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project#endif // LATINIME_DICTIONARY_H
151