dictionary.cpp revision 2111e3abc9c9c0ea0350b8470532bf636b78cdd7
1/*
2 * Copyright (C) 2009, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "LatinIME: dictionary.cpp"
18
19#include "suggest/core/dictionary/dictionary.h"
20
21#include "defines.h"
22#include "suggest/core/dictionary/dictionary_utils.h"
23#include "suggest/core/policy/dictionary_header_structure_policy.h"
24#include "suggest/core/result/suggestion_results.h"
25#include "suggest/core/session/dic_traverse_session.h"
26#include "suggest/core/session/prev_words_info.h"
27#include "suggest/core/suggest.h"
28#include "suggest/core/suggest_options.h"
29#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
30#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
31#include "utils/int_array_view.h"
32#include "utils/log_utils.h"
33#include "utils/time_keeper.h"
34
35namespace latinime {
36
37const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
38
39Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
40        dictionaryStructureWithBufferPolicy)
41        : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
42          mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
43          mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
44    logDictionaryInfo(env);
45}
46
47void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
48        int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
49        int inputSize, const PrevWordsInfo *const prevWordsInfo,
50        const SuggestOptions *const suggestOptions, const float languageWeight,
51        SuggestionResults *const outSuggestionResults) const {
52    TimeKeeper::setCurrentTime();
53    traverseSession->init(this, prevWordsInfo, suggestOptions);
54    const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
55    suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
56            ycoordinates, times, pointerIds, inputCodePoints, inputSize,
57            languageWeight, outSuggestionResults);
58    if (DEBUG_DICT) {
59        outSuggestionResults->dumpSuggestions();
60    }
61}
62
63Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
64        const PrevWordsInfo *const prevWordsInfo, const WordIdArrayView prevWordIds,
65        SuggestionResults *const suggestionResults,
66        const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
67    : mPrevWordsInfo(prevWordsInfo), mPrevWordIds(prevWordIds),
68      mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {}
69
70void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
71        const int targetWordId) {
72    if (targetWordId == NOT_A_WORD_ID) {
73        return;
74    }
75    if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
76            && ngramProbability == NOT_A_PROBABILITY) {
77        return;
78    }
79    int targetWordCodePoints[MAX_WORD_LENGTH];
80    int unigramProbability = 0;
81    const int codePointCount = mDictStructurePolicy->
82            getCodePointsAndProbabilityAndReturnCodePointCount(targetWordId, MAX_WORD_LENGTH,
83                    targetWordCodePoints, &unigramProbability);
84    if (codePointCount <= 0) {
85        return;
86    }
87    const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext(
88            mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */);
89    mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount,
90            wordAttributes.getProbability());
91}
92
93void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
94        SuggestionResults *const outSuggestionResults) const {
95    TimeKeeper::setCurrentTime();
96    int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
97    prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds,
98            true /* tryLowerCaseSearch */);
99    NgramListenerForPrediction listener(prevWordsInfo,
100            WordIdArrayView::fromFixedSizeArray(prevWordIds), outSuggestionResults,
101            mDictionaryStructureWithBufferPolicy.get());
102    mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
103}
104
105int Dictionary::getProbability(const int *word, int length) const {
106    return getNgramProbability(nullptr /* prevWordsInfo */, word, length);
107}
108
109int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
110    TimeKeeper::setCurrentTime();
111    return DictionaryUtils::getMaxProbabilityOfExactMatches(
112            mDictionaryStructureWithBufferPolicy.get(), word, length);
113}
114
115int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
116        int length) const {
117    TimeKeeper::setCurrentTime();
118    int wordId = mDictionaryStructureWithBufferPolicy->getWordId(
119            CodePointArrayView(word, length), false /* forceLowerCaseSearch */);
120    if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
121    if (!prevWordsInfo) {
122        return getDictionaryStructurePolicy()->getProbabilityOfWord(
123                nullptr /* prevWordsPtNodePos */, wordId);
124    }
125    int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
126    prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds,
127            true /* tryLowerCaseSearch */);
128    return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
129}
130
131bool Dictionary::addUnigramEntry(const int *const word, const int length,
132        const UnigramProperty *const unigramProperty) {
133    if (unigramProperty->representsBeginningOfSentence()
134            && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
135                    ->supportsBeginningOfSentence()) {
136        AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
137        return false;
138    }
139    TimeKeeper::setCurrentTime();
140    return mDictionaryStructureWithBufferPolicy->addUnigramEntry(CodePointArrayView(word, length),
141            unigramProperty);
142}
143
144bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
145    TimeKeeper::setCurrentTime();
146    return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(
147            CodePointArrayView(codePoints, codePointCount));
148}
149
150bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
151        const BigramProperty *const bigramProperty) {
152    TimeKeeper::setCurrentTime();
153    return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
154}
155
156bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
157        const int *const word, const int length) {
158    TimeKeeper::setCurrentTime();
159    return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo,
160            CodePointArrayView(word, length));
161}
162
163bool Dictionary::flush(const char *const filePath) {
164    TimeKeeper::setCurrentTime();
165    return mDictionaryStructureWithBufferPolicy->flush(filePath);
166}
167
168bool Dictionary::flushWithGC(const char *const filePath) {
169    TimeKeeper::setCurrentTime();
170    return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
171}
172
173bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
174    TimeKeeper::setCurrentTime();
175    return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
176}
177
178void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
179        const int maxResultLength) {
180    TimeKeeper::setCurrentTime();
181    return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
182            maxResultLength);
183}
184
185const WordProperty Dictionary::getWordProperty(const int *const codePoints,
186        const int codePointCount) {
187    TimeKeeper::setCurrentTime();
188    return mDictionaryStructureWithBufferPolicy->getWordProperty(
189            CodePointArrayView(codePoints, codePointCount));
190}
191
192int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
193        int *const outCodePointCount) {
194    TimeKeeper::setCurrentTime();
195    return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
196            token, outCodePoints, outCodePointCount);
197}
198
199void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
200    int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
201    int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
202    int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
203    const DictionaryHeaderStructurePolicy *const headerPolicy =
204            getDictionaryStructurePolicy()->getHeaderStructurePolicy();
205    headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
206            HEADER_ATTRIBUTE_BUFFER_SIZE);
207    headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
208            HEADER_ATTRIBUTE_BUFFER_SIZE);
209    headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
210            HEADER_ATTRIBUTE_BUFFER_SIZE);
211
212    char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
213    char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
214    char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
215    intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
216            dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
217    intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
218            versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
219    intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
220            dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
221
222    LogUtils::logToJava(env,
223            "Dictionary info: dictionary = %s ; version = %s ; date = %s",
224            dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
225}
226
227} // namespace latinime
228