dictionary.cpp revision 2111e3abc9c9c0ea0350b8470532bf636b78cdd7
1/* 2 * Copyright (C) 2009, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "LatinIME: dictionary.cpp" 18 19#include "suggest/core/dictionary/dictionary.h" 20 21#include "defines.h" 22#include "suggest/core/dictionary/dictionary_utils.h" 23#include "suggest/core/policy/dictionary_header_structure_policy.h" 24#include "suggest/core/result/suggestion_results.h" 25#include "suggest/core/session/dic_traverse_session.h" 26#include "suggest/core/session/prev_words_info.h" 27#include "suggest/core/suggest.h" 28#include "suggest/core/suggest_options.h" 29#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" 30#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" 31#include "utils/int_array_view.h" 32#include "utils/log_utils.h" 33#include "utils/time_keeper.h" 34 35namespace latinime { 36 37const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; 38 39Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr 40 dictionaryStructureWithBufferPolicy) 41 : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)), 42 mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), 43 mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { 44 logDictionaryInfo(env); 45} 46 47void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, 48 int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, 49 int inputSize, const PrevWordsInfo *const prevWordsInfo, 50 const SuggestOptions *const suggestOptions, const float languageWeight, 51 SuggestionResults *const outSuggestionResults) const { 52 TimeKeeper::setCurrentTime(); 53 traverseSession->init(this, prevWordsInfo, suggestOptions); 54 const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest; 55 suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, 56 ycoordinates, times, pointerIds, inputCodePoints, inputSize, 57 languageWeight, outSuggestionResults); 58 if (DEBUG_DICT) { 59 outSuggestionResults->dumpSuggestions(); 60 } 61} 62 63Dictionary::NgramListenerForPrediction::NgramListenerForPrediction( 64 const PrevWordsInfo *const prevWordsInfo, const WordIdArrayView prevWordIds, 65 SuggestionResults *const suggestionResults, 66 const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) 67 : mPrevWordsInfo(prevWordsInfo), mPrevWordIds(prevWordIds), 68 mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {} 69 70void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability, 71 const int targetWordId) { 72 if (targetWordId == NOT_A_WORD_ID) { 73 return; 74 } 75 if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */) 76 && ngramProbability == NOT_A_PROBABILITY) { 77 return; 78 } 79 int targetWordCodePoints[MAX_WORD_LENGTH]; 80 int unigramProbability = 0; 81 const int codePointCount = mDictStructurePolicy-> 82 getCodePointsAndProbabilityAndReturnCodePointCount(targetWordId, MAX_WORD_LENGTH, 83 targetWordCodePoints, &unigramProbability); 84 if (codePointCount <= 0) { 85 return; 86 } 87 const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext( 88 mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */); 89 mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, 90 wordAttributes.getProbability()); 91} 92 93void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, 94 SuggestionResults *const outSuggestionResults) const { 95 TimeKeeper::setCurrentTime(); 96 int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 97 prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds, 98 true /* tryLowerCaseSearch */); 99 NgramListenerForPrediction listener(prevWordsInfo, 100 WordIdArrayView::fromFixedSizeArray(prevWordIds), outSuggestionResults, 101 mDictionaryStructureWithBufferPolicy.get()); 102 mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener); 103} 104 105int Dictionary::getProbability(const int *word, int length) const { 106 return getNgramProbability(nullptr /* prevWordsInfo */, word, length); 107} 108 109int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const { 110 TimeKeeper::setCurrentTime(); 111 return DictionaryUtils::getMaxProbabilityOfExactMatches( 112 mDictionaryStructureWithBufferPolicy.get(), word, length); 113} 114 115int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, 116 int length) const { 117 TimeKeeper::setCurrentTime(); 118 int wordId = mDictionaryStructureWithBufferPolicy->getWordId( 119 CodePointArrayView(word, length), false /* forceLowerCaseSearch */); 120 if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY; 121 if (!prevWordsInfo) { 122 return getDictionaryStructurePolicy()->getProbabilityOfWord( 123 nullptr /* prevWordsPtNodePos */, wordId); 124 } 125 int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 126 prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds, 127 true /* tryLowerCaseSearch */); 128 return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId); 129} 130 131bool Dictionary::addUnigramEntry(const int *const word, const int length, 132 const UnigramProperty *const unigramProperty) { 133 if (unigramProperty->representsBeginningOfSentence() 134 && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy() 135 ->supportsBeginningOfSentence()) { 136 AKLOGE("The dictionary doesn't support Beginning-of-Sentence."); 137 return false; 138 } 139 TimeKeeper::setCurrentTime(); 140 return mDictionaryStructureWithBufferPolicy->addUnigramEntry(CodePointArrayView(word, length), 141 unigramProperty); 142} 143 144bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) { 145 TimeKeeper::setCurrentTime(); 146 return mDictionaryStructureWithBufferPolicy->removeUnigramEntry( 147 CodePointArrayView(codePoints, codePointCount)); 148} 149 150bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, 151 const BigramProperty *const bigramProperty) { 152 TimeKeeper::setCurrentTime(); 153 return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty); 154} 155 156bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, 157 const int *const word, const int length) { 158 TimeKeeper::setCurrentTime(); 159 return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, 160 CodePointArrayView(word, length)); 161} 162 163bool Dictionary::flush(const char *const filePath) { 164 TimeKeeper::setCurrentTime(); 165 return mDictionaryStructureWithBufferPolicy->flush(filePath); 166} 167 168bool Dictionary::flushWithGC(const char *const filePath) { 169 TimeKeeper::setCurrentTime(); 170 return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); 171} 172 173bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { 174 TimeKeeper::setCurrentTime(); 175 return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC); 176} 177 178void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult, 179 const int maxResultLength) { 180 TimeKeeper::setCurrentTime(); 181 return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult, 182 maxResultLength); 183} 184 185const WordProperty Dictionary::getWordProperty(const int *const codePoints, 186 const int codePointCount) { 187 TimeKeeper::setCurrentTime(); 188 return mDictionaryStructureWithBufferPolicy->getWordProperty( 189 CodePointArrayView(codePoints, codePointCount)); 190} 191 192int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints, 193 int *const outCodePointCount) { 194 TimeKeeper::setCurrentTime(); 195 return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken( 196 token, outCodePoints, outCodePointCount); 197} 198 199void Dictionary::logDictionaryInfo(JNIEnv *const env) const { 200 int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 201 int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 202 int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 203 const DictionaryHeaderStructurePolicy *const headerPolicy = 204 getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 205 headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer, 206 HEADER_ATTRIBUTE_BUFFER_SIZE); 207 headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer, 208 HEADER_ATTRIBUTE_BUFFER_SIZE); 209 headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, 210 HEADER_ATTRIBUTE_BUFFER_SIZE); 211 212 char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 213 char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 214 char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 215 intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 216 dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 217 intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 218 versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 219 intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 220 dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 221 222 LogUtils::logToJava(env, 223 "Dictionary info: dictionary = %s ; version = %s ; date = %s", 224 dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer); 225} 226 227} // namespace latinime 228