dictionary.cpp revision d0246277fde27e9c40a270e206f1d106811e847f
1923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project/* 20bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Copyright (C) 2009, The Android Open Source Project 30bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 40bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Licensed under the Apache License, Version 2.0 (the "License"); 50bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * you may not use this file except in compliance with the License. 60bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * You may obtain a copy of the License at 70bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 80bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * http://www.apache.org/licenses/LICENSE-2.0 90bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 100bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Unless required by applicable law or agreed to in writing, software 110bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * distributed under the License is distributed on an "AS IS" BASIS, 120bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * See the License for the specific language governing permissions and 140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * limitations under the License. 150bbb917d12358e0264796e75dea888f244761b64Ken Wakasa */ 16923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 17e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#define LOG_TAG "LatinIME: dictionary.cpp" 18e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 19a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dictionary/dictionary.h" 20b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa 2177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include <stdint.h> 2277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 233b088a2f365a9ce06f58243c83cb961ea2920b7eKen Wakasa#include "defines.h" 24d18112046af044d1a13d5399087e14a0d230cfedSatoshi Kataoka#include "suggest/core/dictionary/bigram_dictionary.h" 2576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_header_structure_policy.h" 260624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 277c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka#include "suggest/core/session/dic_traverse_session.h" 28f497e1e5a0be0a6268bd5f27e6f61308c69c7b63Ken Wakasa#include "suggest/core/suggest.h" 29f497e1e5a0be0a6268bd5f27e6f61308c69c7b63Ken Wakasa#include "suggest/core/suggest_options.h" 30b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" 31b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" 3276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi#include "utils/log_utils.h" 33d4952c8fe9e71c2b313a68077e03d64c8b5196dbsatok 34923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectnamespace latinime { 35923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 360624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke KuroyanagiDictionary::Dictionary(JNIEnv *env, 370624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy) 380624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy), 39d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)), 40e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), 41e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { 4276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi logDictionaryInfo(env); 43923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} 44923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 45662fe69ba2b8513a1a6640adde917db9a13e98afsatokDictionary::~Dictionary() { 4630088259480130e5bac5c2028e2c7c3e6d4c51a2satok delete mBigramDictionary; 47ffd08e37881e9e7f403d04c1a1a8aaba409d36b5Ken Wakasa delete mGestureSuggest; 48609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang delete mTypingSuggest; 49d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi delete mDictionaryStructureWithBufferPolicy; 50923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} 51e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 527c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataokaint Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, 535db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, 54338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint, 55338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi const SuggestOptions *const suggestOptions, int *outWords, int *frequencies, 56338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi int *spaceIndices, int *outputTypes) const { 5777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa int result = 0; 58338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi if (suggestOptions->isGesture()) { 597c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka DicTraverseSession::initSessionInstance( 60338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); 615db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, 625db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, 635db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa frequencies, spaceIndices, outputTypes); 64586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka if (DEBUG_DICT) { 655db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa DUMP_RESULT(outWords, frequencies); 66586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka } 6777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa return result; 6877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa } else { 696c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka DicTraverseSession::initSessionInstance( 706c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); 716c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, 726c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, 736c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka outWords, frequencies, spaceIndices, outputTypes); 746c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka if (DEBUG_DICT) { 756c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka DUMP_RESULT(outWords, frequencies); 76609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang } 776c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka return result; 7877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa } 7977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa} 8077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 812a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagiint Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies, 822a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagi int *outputTypes) const { 8377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa if (length <= 0) return 0; 842a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagi return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes); 8577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa} 8677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 87e0e67373735918c78eaeaf24f127e1d28816aa29Satoshi Kataokaint Dictionary::getProbability(const int *word, int length) const { 88d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, 896c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka false /* forceLowerCaseSearch */); 90cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (NOT_A_DICT_POS == pos) { 916c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka return NOT_A_PROBABILITY; 926c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka } 9365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); 94e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok} 95e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 965bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagibool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { 975bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi return mBigramDictionary->isValidBigram(word0, length0, word1, length1); 985bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi} 995bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi 1005bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::addUnigramWord(const int *const word, const int length, const int probability) { 10166facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability); 1025bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi} 1035bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi 1045bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, 1055bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi const int length1, const int probability) { 10666facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1, 10766facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi probability); 1085bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi} 1095bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi 1105bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::removeBigramWords(const int *const word0, const int length0, 1115bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi const int *const word1, const int length1) { 11266facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1); 1134d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang} 114ede2333640accecc066de328cb4f93e03e4bc5d7Tom Ouyang 115d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid Dictionary::flush(const char *const filePath) { 116d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->flush(filePath); 117d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 118d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 119d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid Dictionary::flushWithGC(const char *const filePath) { 120d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); 121d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 122d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 123d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagibool Dictionary::needsToRunGC() { 124d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi return mDictionaryStructureWithBufferPolicy->needsToRunGC(); 125d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 126d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 12776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagivoid Dictionary::logDictionaryInfo(JNIEnv *const env) const { 12876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi const int BUFFER_SIZE = 16; 12976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi int dictionaryIdCodePointBuffer[BUFFER_SIZE]; 13076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi int versionStringCodePointBuffer[BUFFER_SIZE]; 13176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi int dateStringCodePointBuffer[BUFFER_SIZE]; 13276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi const DictionaryHeaderStructurePolicy *const headerPolicy = 13376e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 13476e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer, 13576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi BUFFER_SIZE); 13676e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer, 13776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi BUFFER_SIZE); 13876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE); 13976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi 14076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi char dictionaryIdCharBuffer[BUFFER_SIZE]; 14176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi char versionStringCharBuffer[BUFFER_SIZE]; 14276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi char dateStringCharBuffer[BUFFER_SIZE]; 14376e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, 14476e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi dictionaryIdCharBuffer, BUFFER_SIZE); 14576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, 14676e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi versionStringCharBuffer, BUFFER_SIZE); 14776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, 14876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi dateStringCharBuffer, BUFFER_SIZE); 14976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi 15076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi LogUtils::logToJava(env, 1510624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi "Dictionary info: dictionary = %s ; version = %s ; date = %s", 1520624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer); 15376e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi} 15476e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi 155923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} // namespace latinime 156