dictionary.cpp revision bd1f59bda5ad0b7028ec06c2de078f1623e76cdd
1/*
2 * Copyright (C) 2009, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "LatinIME: dictionary.cpp"
18
19#include "suggest/core/dictionary/dictionary.h"
20
21#include "defines.h"
22#include "suggest/core/dictionary/dictionary_utils.h"
23#include "suggest/core/policy/dictionary_header_structure_policy.h"
24#include "suggest/core/result/suggestion_results.h"
25#include "suggest/core/session/dic_traverse_session.h"
26#include "suggest/core/session/prev_words_info.h"
27#include "suggest/core/suggest.h"
28#include "suggest/core/suggest_options.h"
29#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
30#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
31#include "utils/log_utils.h"
32#include "utils/time_keeper.h"
33
34namespace latinime {
35
36const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
37
38Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
39        dictionaryStructureWithBufferPolicy)
40        : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
41          mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
42          mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
43    logDictionaryInfo(env);
44}
45
46void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
47        int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
48        int inputSize, const PrevWordsInfo *const prevWordsInfo,
49        const SuggestOptions *const suggestOptions, const float languageWeight,
50        SuggestionResults *const outSuggestionResults) const {
51    TimeKeeper::setCurrentTime();
52    traverseSession->init(this, prevWordsInfo, suggestOptions);
53    const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
54    suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
55            ycoordinates, times, pointerIds, inputCodePoints, inputSize,
56            languageWeight, outSuggestionResults);
57    if (DEBUG_DICT) {
58        outSuggestionResults->dumpSuggestions();
59    }
60}
61
62Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
63        const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults,
64        const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
65    : mPrevWordsInfo(prevWordsInfo), mSuggestionResults(suggestionResults),
66      mDictStructurePolicy(dictStructurePolicy) {}
67
68void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
69        const int targetPtNodePos) {
70    if (targetPtNodePos == NOT_A_DICT_POS) {
71        return;
72    }
73    if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
74            && ngramProbability == NOT_A_PROBABILITY) {
75        return;
76    }
77    int targetWordCodePoints[MAX_WORD_LENGTH];
78    int unigramProbability = 0;
79    const int codePointCount = mDictStructurePolicy->
80            getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos,
81                    MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability);
82    if (codePointCount <= 0) {
83        return;
84    }
85    const int probability = mDictStructurePolicy->getProbability(
86            unigramProbability, ngramProbability);
87    mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability);
88}
89
90void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
91        SuggestionResults *const outSuggestionResults) const {
92    TimeKeeper::setCurrentTime();
93    NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
94            mDictionaryStructureWithBufferPolicy.get());
95    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
96    prevWordsInfo->getPrevWordsTerminalPtNodePos(
97            mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
98            true /* tryLowerCaseSearch */);
99    mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsPtNodePos, &listener);
100}
101
102int Dictionary::getProbability(const int *word, int length) const {
103    return getNgramProbability(nullptr /* prevWordsInfo */, word, length);
104}
105
106int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
107    TimeKeeper::setCurrentTime();
108    return DictionaryUtils::getMaxProbabilityOfExactMatches(
109            mDictionaryStructureWithBufferPolicy.get(), word, length);
110}
111
112int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
113        int length) const {
114    TimeKeeper::setCurrentTime();
115    int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
116            length, false /* forceLowerCaseSearch */);
117    if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
118    if (!prevWordsInfo) {
119        return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
120                nullptr /* prevWordsPtNodePos */, nextWordPos);
121    }
122    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
123    prevWordsInfo->getPrevWordsTerminalPtNodePos(
124            mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
125            true /* tryLowerCaseSearch */);
126    return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPos);
127}
128
129bool Dictionary::addUnigramEntry(const int *const word, const int length,
130        const UnigramProperty *const unigramProperty) {
131    if (unigramProperty->representsBeginningOfSentence()
132            && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
133                    ->supportsBeginningOfSentence()) {
134        AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
135        return false;
136    }
137    TimeKeeper::setCurrentTime();
138    return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
139}
140
141bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
142    TimeKeeper::setCurrentTime();
143    return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints, codePointCount);
144}
145
146bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
147        const BigramProperty *const bigramProperty) {
148    TimeKeeper::setCurrentTime();
149    return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
150}
151
152bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
153        const int *const word, const int length) {
154    TimeKeeper::setCurrentTime();
155    return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
156}
157
158bool Dictionary::flush(const char *const filePath) {
159    TimeKeeper::setCurrentTime();
160    return mDictionaryStructureWithBufferPolicy->flush(filePath);
161}
162
163bool Dictionary::flushWithGC(const char *const filePath) {
164    TimeKeeper::setCurrentTime();
165    return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
166}
167
168bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
169    TimeKeeper::setCurrentTime();
170    return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
171}
172
173void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
174        const int maxResultLength) {
175    TimeKeeper::setCurrentTime();
176    return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
177            maxResultLength);
178}
179
180const WordProperty Dictionary::getWordProperty(const int *const codePoints,
181        const int codePointCount) {
182    TimeKeeper::setCurrentTime();
183    return mDictionaryStructureWithBufferPolicy->getWordProperty(
184            codePoints, codePointCount);
185}
186
187int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
188        int *const outCodePointCount) {
189    TimeKeeper::setCurrentTime();
190    return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
191            token, outCodePoints, outCodePointCount);
192}
193
194void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
195    int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
196    int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
197    int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
198    const DictionaryHeaderStructurePolicy *const headerPolicy =
199            getDictionaryStructurePolicy()->getHeaderStructurePolicy();
200    headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
201            HEADER_ATTRIBUTE_BUFFER_SIZE);
202    headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
203            HEADER_ATTRIBUTE_BUFFER_SIZE);
204    headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
205            HEADER_ATTRIBUTE_BUFFER_SIZE);
206
207    char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
208    char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
209    char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
210    intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
211            dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
212    intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
213            versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
214    intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
215            dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
216
217    LogUtils::logToJava(env,
218            "Dictionary info: dictionary = %s ; version = %s ; date = %s",
219            dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
220}
221
222} // namespace latinime
223