dictionary.cpp revision c18510049a3422c88ed3ab3bbc64944c94a611fd
1923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project/* 20bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Copyright (C) 2009, The Android Open Source Project 30bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 40bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Licensed under the Apache License, Version 2.0 (the "License"); 50bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * you may not use this file except in compliance with the License. 60bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * You may obtain a copy of the License at 70bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 80bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * http://www.apache.org/licenses/LICENSE-2.0 90bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 100bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Unless required by applicable law or agreed to in writing, software 110bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * distributed under the License is distributed on an "AS IS" BASIS, 120bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * See the License for the specific language governing permissions and 140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * limitations under the License. 150bbb917d12358e0264796e75dea888f244761b64Ken Wakasa */ 16923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 17e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#define LOG_TAG "LatinIME: dictionary.cpp" 18e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 19a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dictionary/dictionary.h" 20b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa 2177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include <stdint.h> 2277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 233b088a2f365a9ce06f58243c83cb961ea2920b7eKen Wakasa#include "defines.h" 24d18112046af044d1a13d5399087e14a0d230cfedSatoshi Kataoka#include "suggest/core/dictionary/bigram_dictionary.h" 2576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_header_structure_policy.h" 260624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 277c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka#include "suggest/core/session/dic_traverse_session.h" 28f497e1e5a0be0a6268bd5f27e6f61308c69c7b63Ken Wakasa#include "suggest/core/suggest.h" 29f497e1e5a0be0a6268bd5f27e6f61308c69c7b63Ken Wakasa#include "suggest/core/suggest_options.h" 30b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" 31b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" 3276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi#include "utils/log_utils.h" 33d4952c8fe9e71c2b313a68077e03d64c8b5196dbsatok 34923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectnamespace latinime { 35923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 365ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagiconst int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; 375ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi 380624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke KuroyanagiDictionary::Dictionary(JNIEnv *env, 390624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy) 400624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy), 41d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)), 42e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), 43e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { 4476e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi logDictionaryInfo(env); 45923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} 46923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 47662fe69ba2b8513a1a6640adde917db9a13e98afsatokDictionary::~Dictionary() { 4830088259480130e5bac5c2028e2c7c3e6d4c51a2satok delete mBigramDictionary; 49ffd08e37881e9e7f403d04c1a1a8aaba409d36b5Ken Wakasa delete mGestureSuggest; 50609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang delete mTypingSuggest; 51d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi delete mDictionaryStructureWithBufferPolicy; 52923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} 53e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 547c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataokaint Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, 555db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, 56338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint, 57338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi const SuggestOptions *const suggestOptions, int *outWords, int *frequencies, 58338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi int *spaceIndices, int *outputTypes) const { 5977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa int result = 0; 60338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi if (suggestOptions->isGesture()) { 617c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka DicTraverseSession::initSessionInstance( 62338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); 635db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, 645db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, 655db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa frequencies, spaceIndices, outputTypes); 66586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka if (DEBUG_DICT) { 675db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa DUMP_RESULT(outWords, frequencies); 68586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka } 6977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa return result; 7077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa } else { 716c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka DicTraverseSession::initSessionInstance( 726c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); 736c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, 746c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, 756c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka outWords, frequencies, spaceIndices, outputTypes); 766c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka if (DEBUG_DICT) { 776c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka DUMP_RESULT(outWords, frequencies); 78609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang } 796c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka return result; 8077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa } 8177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa} 8277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 832a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagiint Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies, 842a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagi int *outputTypes) const { 8577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa if (length <= 0) return 0; 862a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagi return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes); 8777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa} 8877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 89e0e67373735918c78eaeaf24f127e1d28816aa29Satoshi Kataokaint Dictionary::getProbability(const int *word, int length) const { 90d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, 916c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka false /* forceLowerCaseSearch */); 92cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (NOT_A_DICT_POS == pos) { 936c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka return NOT_A_PROBABILITY; 946c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka } 9565d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); 96e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok} 97e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 984d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagiint Dictionary::getBigramProbability(const int *word0, int length0, const int *word1, 994d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi int length1) const { 1004d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi return mBigramDictionary->getBigramProbability(word0, length0, word1, length1); 1015bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi} 1025bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi 1035bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::addUnigramWord(const int *const word, const int length, const int probability) { 10466facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability); 1055bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi} 1065bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi 1075bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, 1085bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi const int length1, const int probability) { 10966facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1, 11066facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi probability); 1115bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi} 1125bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi 1135bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::removeBigramWords(const int *const word0, const int length0, 1145bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi const int *const word1, const int length1) { 11566facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1); 1164d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang} 117ede2333640accecc066de328cb4f93e03e4bc5d7Tom Ouyang 118d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid Dictionary::flush(const char *const filePath) { 119d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->flush(filePath); 120d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 121d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 122d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid Dictionary::flushWithGC(const char *const filePath) { 123d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); 124d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 125d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 126c18510049a3422c88ed3ab3bbc64944c94a611fdKeisuke Kuroyanagibool Dictionary::needsToRunGC(const bool mindsBlockByGC) { 127c18510049a3422c88ed3ab3bbc64944c94a611fdKeisuke Kuroyanagi return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC); 128d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 129d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 13031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagivoid Dictionary::getProperty(const char *const query, char *const outResult, 13131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi const int maxResultLength) const { 13231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength); 13331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi} 13431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi 13576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagivoid Dictionary::logDictionaryInfo(JNIEnv *const env) const { 1365ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 1375ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 1385ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 13976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi const DictionaryHeaderStructurePolicy *const headerPolicy = 14076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 14176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer, 1425ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi HEADER_ATTRIBUTE_BUFFER_SIZE); 14376e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer, 1445ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi HEADER_ATTRIBUTE_BUFFER_SIZE); 1455ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, 1465ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi HEADER_ATTRIBUTE_BUFFER_SIZE); 1475ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi 1485ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 1495ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 1505ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 1515ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 1525ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 1535ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 1545ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 1555ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 1565ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 15776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi 15876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi LogUtils::logToJava(env, 1590624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi "Dictionary info: dictionary = %s ; version = %s ; date = %s", 1600624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer); 16176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi} 16276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi 163923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} // namespace latinime 164